From 09f0b8a70090c27a500eff2a1754fccaa26b8e49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20S=C5=82omkowski?= Date: Fri, 17 Feb 2017 13:50:44 +0100 Subject: [PATCH] Add support for latin1 encoding of input files. #2 --- .gitignore | 2 -- nginxfmt.py | 65 +++++++++++++++++++++++++---------- test-files/umlaut-latin1.conf | 20 +++++++++++ test-files/umlaut-utf8.conf | 20 +++++++++++ test_nginxfmt.py | 43 +++++++++++++++++++++-- 5 files changed, 128 insertions(+), 22 deletions(-) create mode 100644 test-files/umlaut-latin1.conf create mode 100644 test-files/umlaut-utf8.conf diff --git a/.gitignore b/.gitignore index be3979b..0c9eabd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,4 @@ .idea *.iml -test-files - diff --git a/nginxfmt.py b/nginxfmt.py index 1bce3c5..1e7849e 100755 --- a/nginxfmt.py +++ b/nginxfmt.py @@ -1,8 +1,10 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- """Script formats nginx configuration file.""" import argparse +import codecs import re @@ -76,7 +78,7 @@ def perform_indentation(lines): return indented_lines -def format_config_file(contents): +def format_config_contents(contents): """Accepts the string containing nginx configuration and returns formatted one. Adds newline at the end.""" lines = clean_lines(contents.splitlines()) lines = join_opening_bracket(lines) @@ -90,28 +92,55 @@ def format_config_file(contents): return text + '\n' +def format_config_file(file_path, original_backup_file_path=None, verbose=True): + """ + Performs the formatting on the given file. The function tries to detect file encoding first. + :param file_path: path to original nginx configuration file. This file will be overridden. + :param original_backup_file_path: optional path, where original file will be backed up. + :param verbose: show messages + """ + encodings = ('utf-8', 'latin1') + + encoding_failures = [] + chosen_encoding = None + + for enc in encodings: + try: + with codecs.open(file_path, 'r', encoding=enc) as rfp: + original_file_content = rfp.read() + chosen_encoding = enc + break + except ValueError as e: + encoding_failures.append(e) + + if chosen_encoding is None: + raise Exception('none of encodings %s are valid for file %s. Errors: %s' + % (encodings, file_path, [e.message for e in encoding_failures])) + + assert original_file_content is not None + + with codecs.open(file_path, 'w', encoding=chosen_encoding) as wfp: + wfp.write(format_config_contents(original_file_content)) + + if verbose: + print("Formatted file '%s' (detected encoding %s)." % (file_path, chosen_encoding)) + + if original_backup_file_path: + with codecs.open(original_backup_file_path, 'w', encoding=chosen_encoding) as wfp: + wfp.write(original_file_content) + if verbose: + print("Original saved to '%s'." % original_backup_file_path) + + if __name__ == "__main__": arg_parser = argparse.ArgumentParser(description=__doc__) arg_parser.add_argument("-v", "--verbose", action="store_true", help="show formatted file names") arg_parser.add_argument("-b", "--backup-original", action="store_true", help="backup original config file") - arg_parser.add_argument("config_files", type=argparse.FileType('r'), nargs='+', - help="configuration files to format") + arg_parser.add_argument("config_files", nargs='+', help="configuration files to format") args = arg_parser.parse_args() - for config_file in args.config_files: - original_file_content = config_file.read() - config_file.close() - - with open(config_file.name, 'w') as fp: - fp.write(format_config_file(original_file_content)) - if args.verbose: - print("Formatted file %s" % config_file.name) - - if args.backup_original: - backup_file_path = config_file.name + '~' - with open(backup_file_path, 'w') as fp: - fp.write(original_file_content) - if args.verbose: - print("Original saved to %s" % backup_file_path) + for config_file_path in args.config_files: + backup_file_path = config_file_path + '~' if args.backup_original else None + format_config_file(config_file_path, backup_file_path, args.verbose) diff --git a/test-files/umlaut-latin1.conf b/test-files/umlaut-latin1.conf new file mode 100644 index 0000000..5127d1a --- /dev/null +++ b/test-files/umlaut-latin1.conf @@ -0,0 +1,20 @@ +http { + server { + listen 80 default_server; + server_name example.com; + + # redirect auf https://www.... + location / { + return 301 https://www.example.com$request_uri; + } + + # Statusseite für Monitoring freigeben + # line above contains german umlaut causing problems + location /nginx_status { + stub_status on; + access_log off; + allow 127.0.0.1; + deny all; + } + } +} diff --git a/test-files/umlaut-utf8.conf b/test-files/umlaut-utf8.conf new file mode 100644 index 0000000..3b7fd04 --- /dev/null +++ b/test-files/umlaut-utf8.conf @@ -0,0 +1,20 @@ +http { + server { + listen 80 default_server; + server_name example.com; + + # redirect auf https://www.... + location / { + return 301 https://www.example.com$request_uri; + } + + # Statusseite für Monitoring freigeben + # line above contains german umlaut causing problems + location /nginx_status { + stub_status on; + access_log off; + allow 127.0.0.1; + deny all; + } + } +} diff --git a/test_nginxfmt.py b/test_nginxfmt.py index 14f8c21..3c8da8e 100644 --- a/test_nginxfmt.py +++ b/test_nginxfmt.py @@ -1,5 +1,10 @@ -"""Unit tests for nginxfmt module.""" +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""Unit tests for nginxfmt module.""" +import os +import shutil +import tempfile import unittest from nginxfmt import * @@ -10,7 +15,7 @@ __license__ = "Apache 2.0" class TestFormatter(unittest.TestCase): def _check_formatting(self, original_text, formatted_text): - self.assertEqual(formatted_text, format_config_file(original_text)) + self.assertMultiLineEqual(formatted_text, format_config_contents(original_text)) def test_join_opening_parenthesis(self): self.assertEqual(["foo", "bar {", "johan {", "tee", "ka", "}"], @@ -70,6 +75,26 @@ class TestFormatter(unittest.TestCase): self.assertEqual('lorem ipsum " foo bar zip " or " dd aa " mi', strip_line(' lorem ipsum " foo bar zip " or \t " dd aa " mi')) + def test_umlaut_in_string(self): + self._check_formatting( + "# Statusseite für Monitoring freigeben \n" + + "# line above contains german umlaut causing problems \n" + + "location /nginx_status {\n" + + " stub_status on;\n" + + " access_log off;\n" + + " allow 127.0.0.1;\n" + + " deny all;\n" + + "}", + "# Statusseite für Monitoring freigeben\n" + + "# line above contains german umlaut causing problems\n" + + "location /nginx_status {\n" + + " stub_status on;\n" + + " access_log off;\n" + + " allow 127.0.0.1;\n" + + " deny all;\n" + + "}\n" + ) + def test_empty_lines_removal(self): self._check_formatting( "\n foo bar {\n" + @@ -100,6 +125,20 @@ class TestFormatter(unittest.TestCase): " }\n" + "}\n") + def test_loading_utf8_file(self): + tmp_file = tempfile.mkstemp('utf-8')[1] + shutil.copy('test-files/umlaut-utf8.conf', tmp_file) + format_config_file(tmp_file, verbose=True) + # todo perform some tests on result file + os.unlink(tmp_file) + + def test_loading_latin1_file(self): + tmp_file = tempfile.mkstemp('latin1')[1] + shutil.copy('test-files/umlaut-latin1.conf', tmp_file) + format_config_file(tmp_file, verbose=True) + # todo perform some tests on result file + os.unlink(tmp_file) + if __name__ == '__main__': unittest.main()