From be941fccca0fa90a90de79722acddc30dec82bde Mon Sep 17 00:00:00 2001 From: Herwin Date: Tue, 30 Jan 2024 08:25:13 +0100 Subject: [PATCH] Fixed UTF-8 encoding issue in content index files Update content_index_maker.py --- content_index_maker.py | 6 ++++-- content_item_processor.py | 0 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 content_item_processor.py diff --git a/content_index_maker.py b/content_index_maker.py index f5f8247..53b0aa8 100644 --- a/content_index_maker.py +++ b/content_index_maker.py @@ -60,7 +60,9 @@ for index_entry_filename in os.listdir(input_folder): print("> Processing '{}'".format(index_entry_filename)) - with open(os.path.join(input_folder, index_entry_filename), 'r') as f: + # Loading the YAML data + # See: https://github.com/yaml/pyyaml/issues/123#issuecomment-395431735 + with open(os.path.join(input_folder, index_entry_filename), 'rt', encoding='utf8') as f: index_entry_data = yaml.safe_load(f) # Checking some stuff @@ -97,4 +99,4 @@ except IOError: pass with open(output_file, "wb") as f: - f.write(json.dumps(index_data, separators=(',', ':')).encode("utf-8")) + f.write(json.dumps(index_data, separators=(',', ':'), ensure_ascii=False).encode("utf-8")) diff --git a/content_item_processor.py b/content_item_processor.py new file mode 100644 index 0000000..e69de29