nginx-config-formatter/nginxfmt.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""This Python script formats nginx configuration files in consistent way.

Originally published under https://github.com/1connect/nginx-config-formatter
"""

import argparse
import codecs

import re

__author__ = "Michał Słomkowski"
__license__ = "Apache 2.0"
__version__ = "1.0.2"

INDENTATION = '	'

TEMPLATE_VARIABLE_OPENING_TAG = '___TEMPLATE_VARIABLE_OPENING_TAG___'
TEMPLATE_VARIABLE_CLOSING_TAG = '___TEMPLATE_VARIABLE_CLOSING_TAG___'

TEMPLATE_BRACKET_OPENING_TAG = '___TEMPLATE_BRACKET_OPENING_TAG___'
TEMPLATE_BRACKET_CLOSING_TAG = '___TEMPLATE_BRACKET_CLOSING_TAG___'


def strip_line(single_line):
    """Strips the line and replaces neighbouring whitespaces with single space (except when within quotation marks)."""
    single_line = single_line.strip()
    if single_line.startswith('#'):
        return single_line

    within_quotes = False
    parts = []
    for part in re.split('"', single_line):
        if within_quotes:
            parts.append(part)
        else:
            parts.append(re.sub(r'[\s]+', ' ', part))
        within_quotes = not within_quotes
    return '"'.join(parts)


def count_multi_semicolon(single_line):
    """count multi_semicolon (except when within quotation marks)."""
    single_line = single_line.strip()
    if single_line.startswith('#'):
        return 0, 0

    within_quotes = False
    q = 0
    c = 0
    for part in re.split('"', single_line):
        if within_quotes:
            q = 1
        else:
            c += part.count(';')
        within_quotes = not within_quotes
    return q, c


def multi_semicolon(single_line):
    """break multi_semicolon into multiline (except when within quotation marks)."""
    single_line = single_line.strip()
    if single_line.startswith('#'):
        return single_line

    within_quotes = False
    parts = []
    for part in re.split('"', single_line):
        if within_quotes:
            parts.append(part)
        else:
            parts.append(part.replace(";", ";\n"))
        within_quotes = not within_quotes
    return '"'.join(parts)


def apply_variable_template_tags(line: str) -> str:
    """Replaces variable indicators ${ and } with tags, so subsequent formatting is easier."""
    return re.sub(r'\${\s*(\w+)\s*}',
                  TEMPLATE_VARIABLE_OPENING_TAG + r"\1" + TEMPLATE_VARIABLE_CLOSING_TAG,
                  line,
                  flags=re.UNICODE)


def strip_variable_template_tags(line: str) -> str:
    """Replaces tags back with ${ and } respectively."""
    return re.sub(TEMPLATE_VARIABLE_OPENING_TAG + r'\s*(\w+)\s*' + TEMPLATE_VARIABLE_CLOSING_TAG,
                  r'${\1}',
                  line,
                  flags=re.UNICODE)


def apply_bracket_template_tags(content: str) -> str:
    """ Replaces bracket { and } with tags, so subsequent formatting is easier."""
    result = ""
    in_quotes = False
    last_c = ""

    for c in content:
        if (c == "\'" or c == "\"") and last_c != "\\":
            in_quotes = reverse_in_quotes_status(in_quotes)
        if in_quotes:
            if c == "{":
                result += TEMPLATE_BRACKET_OPENING_TAG
            elif c == "}":
                result += TEMPLATE_BRACKET_CLOSING_TAG
            else:
                result += c
        else:
            result += c
        last_c = c
    return result


def reverse_in_quotes_status(status: bool) -> bool:
    if status:
        return False
    return True


def strip_bracket_template_tags(content: str) -> str:
    """ Replaces tags back with { and } respectively."""
    content = content.replace(TEMPLATE_BRACKET_OPENING_TAG, "{", -1)
    content = content.replace(TEMPLATE_BRACKET_CLOSING_TAG, "}", -1)
    return content


def clean_lines(orig_lines) -> list:
    """Strips the lines and splits them if they contain curly brackets."""
    cleaned_lines = []
    for line in orig_lines:
        line = strip_line(line)
        line = apply_variable_template_tags(line)
        if line == "":
            cleaned_lines.append("")
            continue
        else:
            if line.startswith("#"):
                cleaned_lines.append(strip_variable_template_tags(line))
            else:
                q, c = count_multi_semicolon(line)
                if q == 1 and c > 1:
                    ml = multi_semicolon(line)
                    cleaned_lines.extend(clean_lines(ml.splitlines()))
                elif q != 1 and c > 1:
                    newlines = line.split(";")
                    cleaned_lines.extend(clean_lines(["".join([ln, ";"]) for ln in newlines if ln != ""]))
                else:
                    if line.startswith("rewrite"):
                        cleaned_lines.append(strip_variable_template_tags(line))
                    else:
                        cleaned_lines.extend(
                            [strip_variable_template_tags(l).strip() for l in re.split(r"([{}])", line) if l != ""])
    return cleaned_lines


def join_opening_bracket(lines):
    """When opening curly bracket is in it's own line (K&R convention), it's joined with precluding line (Java)."""
    modified_lines = []
    for i in range(len(lines)):
        if i > 0 and lines[i] == "{":
            modified_lines[-1] += " {"
        else:
            modified_lines.append(lines[i])
    return modified_lines


def perform_indentation(lines):
    """Indents the lines according to their nesting level determined by curly brackets."""
    indented_lines = []
    current_indent = 0
    current_line = ""

    for line in lines:
        if not line.startswith("#") and line.endswith('}') and current_indent > 0:
            current_indent -= 1

        if line != "":
            directive = line.split(' ', maxsplit=1)
            if directive[0] == '}' or directive[0] == 'if' or directive[0] == 'location':
                indented_lines.append(current_indent * INDENTATION + line)
            elif directive[1] == '{':
                indented_lines.append(current_indent * INDENTATION + line)
            else:
                directive_split_space = int( ( 40 - (current_indent * 8) - len(directive[0]) ) / 8 ) + 1
                indented_lines.append(current_indent * INDENTATION + directive[0] + directive_split_space * INDENTATION + directive[1])
        else:
            indented_lines.append("")

        if not line.startswith("#") and line.endswith('{'):
            current_indent += 1

    return indented_lines


def format_config_contents(contents):
    """Accepts the string containing nginx configuration and returns formatted one. Adds newline at the end."""
    contents = apply_bracket_template_tags(contents)
    lines = contents.splitlines()
    lines = clean_lines(lines)
    lines = join_opening_bracket(lines)
    lines = perform_indentation(lines)

    text = '\n'.join(lines)
    text = strip_bracket_template_tags(text)

    for pattern, substitute in ((r'\n{3,}', '\n\n\n'), (r'^\n', ''), (r'\n$', '')):
        text = re.sub(pattern, substitute, text, re.MULTILINE)

    return text + '\n'


def format_config_file(file_path, original_backup_file_path=None, verbose=True):
    """
    Performs the formatting on the given file. The function tries to detect file encoding first.
    :param file_path: path to original nginx configuration file. This file will be overridden.
    :param original_backup_file_path: optional path, where original file will be backed up.
    :param verbose: show messages
    """
    encodings = ('utf-8', 'latin1')

    encoding_failures = []
    chosen_encoding = None

    for enc in encodings:
        try:
            with codecs.open(file_path, 'r', encoding=enc) as rfp:
                original_file_content = rfp.read()
            chosen_encoding = enc
            break
        except ValueError as e:
            encoding_failures.append(e)

    if chosen_encoding is None:
        raise Exception('none of encodings %s are valid for file %s. Errors: %s'
                        % (encodings, file_path, [e.message for e in encoding_failures]))

    assert original_file_content is not None

    with codecs.open(file_path, 'w', encoding=chosen_encoding) as wfp:
        wfp.write(format_config_contents(original_file_content))

    if verbose:
        print("Formatted file '%s' (detected encoding %s)." % (file_path, chosen_encoding))

    if original_backup_file_path:
        with codecs.open(original_backup_file_path, 'w', encoding=chosen_encoding) as wfp:
            wfp.write(original_file_content)
        if verbose:
            print("Original saved to '%s'." % original_backup_file_path)


if __name__ == "__main__":
    arg_parser = argparse.ArgumentParser(description=__doc__)

    arg_parser.add_argument("-v", "--verbose", action="store_true", help="show formatted file names")
    arg_parser.add_argument("-b", "--backup-original", action="store_true", help="backup original config file")
    arg_parser.add_argument("config_files", nargs='+', help="configuration files to format")

    args = arg_parser.parse_args()

    for config_file_path in args.config_files:
        backup_file_path = config_file_path + '~' if args.backup_original else None
        format_config_file(config_file_path, backup_file_path, args.verbose)
Add project bootstrap. 9 years ago			`#!/usr/bin/env python3`
Add support for latin1 encoding of input files. #2 8 years ago			`# -- coding: utf-8 --`
Add project bootstrap. 9 years ago
Add support for variable marks ${ and }. Fixes #4 7 years ago			`"""This Python script formats nginx configuration files in consistent way.`

			`Originally published under https://github.com/1connect/nginx-config-formatter`
			`"""`
Add working implementation. 9 years ago
Add project bootstrap. 9 years ago			`import argparse`
Add support for latin1 encoding of input files. #2 8 years ago			`import codecs`
Add project bootstrap. 9 years ago
Add working implementation. 9 years ago			`import re`

			`__author__ = "Michał Słomkowski"`
			`__license__ = "Apache 2.0"`
Fix error which split line when it contained backslash. Fixes #5 7 years ago			`__version__ = "1.0.2"`
Add working implementation. 9 years ago
Added uniform spacing of directives and their parameters 5 years ago			`INDENTATION = ' '`
Add working implementation. 9 years ago
Add support for variable marks ${ and }. Fixes #4 7 years ago			`TEMPLATE_VARIABLE_OPENING_TAG = '___TEMPLATE_VARIABLE_OPENING_TAG___'`
			`TEMPLATE_VARIABLE_CLOSING_TAG = '___TEMPLATE_VARIABLE_CLOSING_TAG___'`

fix curly bracket in quotes 5 years ago			`TEMPLATE_BRACKET_OPENING_TAG = '___TEMPLATE_BRACKET_OPENING_TAG___'`
			`TEMPLATE_BRACKET_CLOSING_TAG = '___TEMPLATE_BRACKET_CLOSING_TAG___'`
Add working implementation. 9 years ago
Remove spurious __pycache__ dir. Format code. 5 years ago
Add working implementation. 9 years ago			`def strip_line(single_line):`
Add function description and some readme. 9 years ago			`"""Strips the line and replaces neighbouring whitespaces with single space (except when within quotation marks)."""`
Change strip_line to prevent collapsing spaces in comments. 8 years ago			`single_line = single_line.strip()`
			`if single_line.startswith('#'):`
			`return single_line`

Add working implementation. 9 years ago			`within_quotes = False`
			`parts = []`
Change strip_line to prevent collapsing spaces in comments. 8 years ago			`for part in re.split('"', single_line):`
Add working implementation. 9 years ago			`if within_quotes:`
			`parts.append(part)`
			`else:`
			`parts.append(re.sub(r'[\s]+', ' ', part))`
			`within_quotes = not within_quotes`
			`return '"'.join(parts)`

Remove spurious __pycache__ dir. Format code. 5 years ago
fix the multi_semicolon bug 6 years ago			`def count_multi_semicolon(single_line):`
			`"""count multi_semicolon (except when within quotation marks)."""`
			`single_line = single_line.strip()`
			`if single_line.startswith('#'):`
			`return 0, 0`

			`within_quotes = False`
			`q = 0`
			`c = 0`
			`for part in re.split('"', single_line):`
			`if within_quotes:`
			`q = 1`
			`else:`
			`c += part.count(';')`
			`within_quotes = not within_quotes`
			`return q, c`

Remove spurious __pycache__ dir. Format code. 5 years ago
fix the multi_semicolon bug 6 years ago			`def multi_semicolon(single_line):`
			`"""break multi_semicolon into multiline (except when within quotation marks)."""`
			`single_line = single_line.strip()`
			`if single_line.startswith('#'):`
			`return single_line`

			`within_quotes = False`
			`parts = []`
			`for part in re.split('"', single_line):`
			`if within_quotes:`
			`parts.append(part)`
			`else:`
			`parts.append(part.replace(";", ";\n"))`
			`within_quotes = not within_quotes`
			`return '"'.join(parts)`
Add working implementation. 9 years ago
Remove spurious __pycache__ dir. Format code. 5 years ago
Add support for variable marks ${ and }. Fixes #4 7 years ago			`def apply_variable_template_tags(line: str) -> str:`
			`"""Replaces variable indicators ${ and } with tags, so subsequent formatting is easier."""`
			`return re.sub(r'\${\s(\w+)\s}',`
			`TEMPLATE_VARIABLE_OPENING_TAG + r"\1" + TEMPLATE_VARIABLE_CLOSING_TAG,`
			`line,`
			`flags=re.UNICODE)`


			`def strip_variable_template_tags(line: str) -> str:`
			`"""Replaces tags back with ${ and } respectively."""`
			`return re.sub(TEMPLATE_VARIABLE_OPENING_TAG + r'\s(\w+)\s' + TEMPLATE_VARIABLE_CLOSING_TAG,`
			`r'${\1}',`
			`line,`
			`flags=re.UNICODE)`

Remove spurious __pycache__ dir. Format code. 5 years ago
fix curly bracket in quotes 5 years ago			`def apply_bracket_template_tags(content: str) -> str:`
			`""" Replaces bracket { and } with tags, so subsequent formatting is easier."""`
			`result = ""`
			`in_quotes = False`
			`last_c = ""`

			`for c in content:`
			`if (c == "\'" or c == "\"") and last_c != "\\":`
			`in_quotes = reverse_in_quotes_status(in_quotes)`
			`if in_quotes:`
			`if c == "{":`
			`result += TEMPLATE_BRACKET_OPENING_TAG`
			`elif c == "}":`
			`result += TEMPLATE_BRACKET_CLOSING_TAG`
			`else:`
			`result += c`
			`else:`
			`result += c`
			`last_c = c`
			`return result`

Remove spurious __pycache__ dir. Format code. 5 years ago
fix curly bracket in quotes 5 years ago			`def reverse_in_quotes_status(status: bool) -> bool:`
			`if status:`
			`return False`
			`return True`

Remove spurious __pycache__ dir. Format code. 5 years ago
fix curly bracket in quotes 5 years ago			`def strip_bracket_template_tags(content: str) -> str:`
			`""" Replaces tags back with { and } respectively."""`
			`content = content.replace(TEMPLATE_BRACKET_OPENING_TAG, "{", -1)`
			`content = content.replace(TEMPLATE_BRACKET_CLOSING_TAG, "}", -1)`
			`return content`
Add support for variable marks ${ and }. Fixes #4 7 years ago
Remove spurious __pycache__ dir. Format code. 5 years ago
Fix error which split line when it contained backslash. Fixes #5 7 years ago			`def clean_lines(orig_lines) -> list:`
Add function description and some readme. 9 years ago			`"""Strips the lines and splits them if they contain curly brackets."""`
Add working implementation. 9 years ago			`cleaned_lines = []`
			`for line in orig_lines:`
			`line = strip_line(line)`
Add support for variable marks ${ and }. Fixes #4 7 years ago			`line = apply_variable_template_tags(line)`
Add working implementation. 9 years ago			`if line == "":`
			`cleaned_lines.append("")`
			`continue`
			`else:`
Fix error with brackets in comments. 9 years ago			`if line.startswith("#"):`
Add support for variable marks ${ and }. Fixes #4 7 years ago			`cleaned_lines.append(strip_variable_template_tags(line))`
Fix error with brackets in comments. 9 years ago			`else:`
Remove spurious __pycache__ dir. Format code. 5 years ago			`q, c = count_multi_semicolon(line)`
fix the multi_semicolon bug 6 years ago			`if q == 1 and c > 1:`
			`ml = multi_semicolon(line)`
			`cleaned_lines.extend(clean_lines(ml.splitlines()))`
			`elif q != 1 and c > 1:`
fix multiline `;` in one line case and rewrite regex curly bracket case. fix multiline `;` in one line case and rewrite regex curly bracket case. 6 years ago			`newlines = line.split(";")`
			`cleaned_lines.extend(clean_lines(["".join([ln, ";"]) for ln in newlines if ln != ""]))`
			`else:`
			`if line.startswith("rewrite"):`
Remove spurious __pycache__ dir. Format code. 5 years ago			`cleaned_lines.append(strip_variable_template_tags(line))`
fix multiline `;` in one line case and rewrite regex curly bracket case. fix multiline `;` in one line case and rewrite regex curly bracket case. 6 years ago			`else:`
			`cleaned_lines.extend(`
			`[strip_variable_template_tags(l).strip() for l in re.split(r"([{}])", line) if l != ""])`
Add working implementation. 9 years ago			`return cleaned_lines`


Add function description and some readme. 9 years ago			`def join_opening_bracket(lines):`
			`"""When opening curly bracket is in it's own line (K&R convention), it's joined with precluding line (Java)."""`
Add working implementation. 9 years ago			`modified_lines = []`
			`for i in range(len(lines)):`
			`if i > 0 and lines[i] == "{":`
			`modified_lines[-1] += " {"`
			`else:`
			`modified_lines.append(lines[i])`
			`return modified_lines`


			`def perform_indentation(lines):`
Add function description and some readme. 9 years ago			`"""Indents the lines according to their nesting level determined by curly brackets."""`
Add working implementation. 9 years ago			`indented_lines = []`
			`current_indent = 0`
Added uniform spacing of directives and their parameters 5 years ago			`current_line = ""`

Add working implementation. 9 years ago			`for line in lines:`
Fix error with brackets in comments. 9 years ago			`if not line.startswith("#") and line.endswith('}') and current_indent > 0:`
Add working implementation. 9 years ago			`current_indent -= 1`

Add feature to remove excessive newlines. 9 years ago			`if line != "":`
Added uniform spacing of directives and their parameters 5 years ago			`directive = line.split(' ', maxsplit=1)`
			`if directive[0] == '}' or directive[0] == 'if' or directive[0] == 'location':`
			`indented_lines.append(current_indent * INDENTATION + line)`
			`elif directive[1] == '{':`
			`indented_lines.append(current_indent * INDENTATION + line)`
			`else:`
			`directive_split_space = int( ( 40 - (current_indent * 8) - len(directive[0]) ) / 8 ) + 1`
			`indented_lines.append(current_indent * INDENTATION + directive[0] + directive_split_space * INDENTATION + directive[1])`
Add feature to remove excessive newlines. 9 years ago			`else:`
			`indented_lines.append("")`
Add working implementation. 9 years ago
Fix error with brackets in comments. 9 years ago			`if not line.startswith("#") and line.endswith('{'):`
Add working implementation. 9 years ago			`current_indent += 1`

			`return indented_lines`


Add support for latin1 encoding of input files. #2 8 years ago			`def format_config_contents(contents):`
Add function description and some readme. 9 years ago			`"""Accepts the string containing nginx configuration and returns formatted one. Adds newline at the end."""`
fix curly bracket in quotes 5 years ago			`contents = apply_bracket_template_tags(contents)`
Fix error which split line when it contained backslash. Fixes #5 7 years ago			`lines = contents.splitlines()`
			`lines = clean_lines(lines)`
Add function description and some readme. 9 years ago			`lines = join_opening_bracket(lines)`
Add working implementation. 9 years ago			`lines = perform_indentation(lines)`

Add feature to remove excessive newlines. 9 years ago			`text = '\n'.join(lines)`
fix curly bracket in quotes 5 years ago			`text = strip_bracket_template_tags(text)`
Add feature to remove excessive newlines. 9 years ago
			`for pattern, substitute in ((r'\n{3,}', '\n\n\n'), (r'^\n', ''), (r'\n$', '')):`
			`text = re.sub(pattern, substitute, text, re.MULTILINE)`

			`return text + '\n'`
Add working implementation. 9 years ago

Add support for latin1 encoding of input files. #2 8 years ago			`def format_config_file(file_path, original_backup_file_path=None, verbose=True):`
			`"""`
			`Performs the formatting on the given file. The function tries to detect file encoding first.`
			`:param file_path: path to original nginx configuration file. This file will be overridden.`
			`:param original_backup_file_path: optional path, where original file will be backed up.`
			`:param verbose: show messages`
			`"""`
			`encodings = ('utf-8', 'latin1')`

			`encoding_failures = []`
			`chosen_encoding = None`

			`for enc in encodings:`
			`try:`
			`with codecs.open(file_path, 'r', encoding=enc) as rfp:`
			`original_file_content = rfp.read()`
			`chosen_encoding = enc`
			`break`
			`except ValueError as e:`
			`encoding_failures.append(e)`

			`if chosen_encoding is None:`
			`raise Exception('none of encodings %s are valid for file %s. Errors: %s'`
			`% (encodings, file_path, [e.message for e in encoding_failures]))`

			`assert original_file_content is not None`

			`with codecs.open(file_path, 'w', encoding=chosen_encoding) as wfp:`
			`wfp.write(format_config_contents(original_file_content))`

			`if verbose:`
			`print("Formatted file '%s' (detected encoding %s)." % (file_path, chosen_encoding))`

			`if original_backup_file_path:`
			`with codecs.open(original_backup_file_path, 'w', encoding=chosen_encoding) as wfp:`
			`wfp.write(original_file_content)`
			`if verbose:`
			`print("Original saved to '%s'." % original_backup_file_path)`


Add working implementation. 9 years ago			`if __name__ == "__main__":`
Change strip_line to prevent collapsing spaces in comments. 8 years ago			`arg_parser = argparse.ArgumentParser(description=__doc__)`
Add working implementation. 9 years ago
Change strip_line to prevent collapsing spaces in comments. 8 years ago			`arg_parser.add_argument("-v", "--verbose", action="store_true", help="show formatted file names")`
			`arg_parser.add_argument("-b", "--backup-original", action="store_true", help="backup original config file")`
Add support for latin1 encoding of input files. #2 8 years ago			`arg_parser.add_argument("config_files", nargs='+', help="configuration files to format")`
Add working implementation. 9 years ago
Change strip_line to prevent collapsing spaces in comments. 8 years ago			`args = arg_parser.parse_args()`
Add working implementation. 9 years ago
Add support for latin1 encoding of input files. #2 8 years ago			`for config_file_path in args.config_files:`
			`backup_file_path = config_file_path + '~' if args.backup_original else None`
			`format_config_file(config_file_path, backup_file_path, args.verbose)`