dominiontabs/tools/update_language.py
Peter 31a71447f0
precommit git hooks, rearrange package structure (#251)
* precommit hooks and changes because of these
* mv package into src dir
* add requirements file, clean up setup reqs, add precommit to CI
2019-05-23 16:17:30 -07:00

479 lines
19 KiB
Python

###########################################################################
# This file provides maintenance on the various language files
# 1. Create new "xx/cards_xx.json" files that have entries ordered as:
# a. the card_tag entries in "cards_db.json"
# b. the group_tag entries as found in "cards_db.json"
# c. the super group entries (grouping across all expansions"
# d. any unused entries existing in the file (assumed to be work in progress)
#
# 2. Create new "sets_db.json" and "xx/cards_xx.json" with entries sorted alphabetically
#
# All output is in the designated output directory. Original files are not overwritten.
###########################################################################
from __future__ import print_function
import os
import os.path
import io
import codecs
import json
from shutil import copyfile
import argparse
import six
LANGUAGE_DEFAULT = "en_us" # default language, which takes priority
LANGUAGE_XX = "xx" # language for starting a translation
def get_lang_dirs(path):
# Find all valid languages.
languages = []
for name in os.listdir(path):
dir_path = os.path.join(path, name)
if os.path.isdir(dir_path):
cards_file = os.path.join(dir_path, "cards_" + name + ".json")
sets_file = os.path.join(dir_path, "sets_" + name + ".json")
if os.path.isfile(cards_file) and os.path.isfile(sets_file):
languages.append(name)
return languages
def get_json_data(json_file_path):
print(("reading {}".format(json_file_path)))
# Read in the json from the specified file
with codecs.open(json_file_path, "r", "utf-8") as json_file:
data = json.load(json_file)
assert data, "Could not load json at: '%r' " % json_file_path
return data
def json_dict_entry(entry, separator=""):
# Return a nicely formated json dict entry.
# It does not include the enclosing {} and removes trailing white space
json_data = json.dumps(entry, indent=4, ensure_ascii=False, sort_keys=True)
json_data = json_data.strip(
"{}"
).rstrip() # Remove outer{} and then trailing whitespace
return separator + json_data
# Multikey sort
# see: http://stackoverflow.com/questions/1143671/python-sorting-list-of-dictionaries-by-multiple-keys
def multikeysort(items, columns):
from operator import itemgetter
for c in columns[::-1]:
items = sorted(items, key=itemgetter(c))
return items
def main(args):
###########################################################################
# Get all the languages, and place the default language first in the list
###########################################################################
languages = get_lang_dirs(args.card_db_dir)
languages.remove(LANGUAGE_DEFAULT)
languages.insert(0, LANGUAGE_DEFAULT)
if LANGUAGE_XX not in languages:
languages.append(LANGUAGE_XX)
print("Languages:")
print(languages)
print()
###########################################################################
# Make sure the directories exist to hold the output
###########################################################################
# main output directory
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
# each language directory
for lang in languages:
# Make sure the directory is there to hold the file
lang_dir = os.path.join(args.output_dir, lang)
if not os.path.exists(lang_dir):
os.makedirs(lang_dir)
###########################################################################
# Get the types_db information
# Store in a list in the order found in types[]. Ordered by card_type
# 1. card_tags, 2. group_tags, 3. super groups
###########################################################################
type_parts = []
# Get the card data
type_data = get_json_data(os.path.join(args.card_db_dir, "types_db.json"))
# Sort the cards by cardset_tags, then card_tag
sorted_type_data = multikeysort(type_data, ["card_type"])
with io.open(
os.path.join(args.output_dir, "types_db.json"), "w", encoding="utf-8"
) as lang_out:
lang_out.write(six.u("[")) # Start of list
sep = ""
for type in sorted_type_data:
# Collect all the individual types
type_parts = list(set(type["card_type"]) | set(type_parts))
lang_out.write(
sep + json.dumps(type, indent=4, ensure_ascii=False, sort_keys=True)
)
sep = ","
lang_out.write(six.u("\n]\n")) # End of List
type_parts.sort()
print("Unique Types:")
print(type_parts)
print()
###########################################################################
# Get the labels_db information
# Store in a list in the order found.
###########################################################################
all_labels = []
# Get the card data
label_data = get_json_data(os.path.join(args.card_db_dir, "labels_db.json"))
with io.open(
os.path.join(args.output_dir, "labels_db.json"), "w", encoding="utf-8"
) as lang_out:
lang_out.write(six.u("[")) # Start of list
sep = ""
for label in label_data:
# Collect all the individual types
all_labels = list(set(label["names"]) | set(all_labels))
lang_out.write(
sep + json.dumps(label, indent=4, ensure_ascii=False, sort_keys=True)
)
sep = ","
lang_out.write(six.u("\n]\n")) # End of List
all_labels.sort()
print("Labels: ")
print(all_labels)
print()
###########################################################################
# Fix up all the xx/types_xx.json files
# Place entries in alphabetical order
# If entries don't exist:
# If the default language, set from information in the "types_db.json" file,
# If not the default language, set based on information from the default language.
# Lastly, keep any extra entries that are not currently used, just in case needed
# in the future or is a work in progress.
###########################################################################
for lang in languages:
lang_file = "types_" + lang + ".json"
fname = os.path.join(args.card_db_dir, lang, lang_file)
if os.path.isfile(fname):
lang_type_data = get_json_data(fname)
else:
lang_type_data = {}
with io.open(
os.path.join(args.output_dir, lang, lang_file), "w", encoding="utf-8"
) as lang_out:
lang_out.write(six.u("{")) # Start of types
sep = ""
used = []
for type in sorted(type_parts):
if type not in lang_type_data:
if lang == LANGUAGE_DEFAULT:
lang_type_data[type] = type
lang_type_default = lang_type_data
else:
lang_type_data[type] = lang_type_default[type]
lang_out.write(json_dict_entry({type: lang_type_data[type]}, sep))
used.append(type)
sep = ","
# Now keep any unused values just in case needed in the future
for key in lang_type_data:
if key not in used:
lang_out.write(json_dict_entry({key: lang_type_data[key]}, sep))
sep = ","
lang_out.write(six.u("\n}\n")) # End of Types
if lang == LANGUAGE_DEFAULT:
lang_type_default = lang_type_data # Keep for later languages
###########################################################################
# Get the cards_db information
# Store in a list in the order found in cards[]. Ordered as follows:
# 1. card_tags, 2. group_tags, 3. super groups
###########################################################################
cards = []
groups = []
super_groups = [u"events", u"landmarks"]
# Get the card data
card_data = get_json_data(os.path.join(args.card_db_dir, "cards_db.json"))
# Sort the cardset_tags
for card in card_data:
card["cardset_tags"].sort()
# But put all the base cards together by moving to front of the list
if "base" in card["cardset_tags"]:
card["cardset_tags"].remove("base")
card["cardset_tags"].insert(0, "base")
# Sort the cards by cardset_tags, then card_tag
sorted_card_data = multikeysort(card_data, ["cardset_tags", "card_tag"])
with io.open(
os.path.join(args.output_dir, "cards_db.json"), "w", encoding="utf-8"
) as lang_out:
lang_out.write(six.u("[")) # Start of list
sep = ""
for card in sorted_card_data:
if card["card_tag"] not in cards:
cards.append(card["card_tag"])
if "group_tag" in card:
if card["group_tag"] not in groups:
groups.append(card["group_tag"])
lang_out.write(
sep + json.dumps(card, indent=4, ensure_ascii=False, sort_keys=True)
)
sep = ","
lang_out.write(six.u("\n]\n")) # End of List
cards.extend(groups)
cards.extend(super_groups)
print("Cards:")
print(cards)
print()
###########################################################################
# Fix up all the cards_xx.json files
# Place entries in the same order as given in "cards_db.json".
# If entries don't exist:
# If the default language, set base on information in the "cards_db.json" file,
# If not the default language, set based on information from the default language.
# Lastly, keep any extra entries that are not currently used, just in case needed
# in the future or is a work in progress.
###########################################################################
for lang in languages:
# contruct the cards json file name
lang_file = "cards_" + lang + ".json"
fname = os.path.join(args.card_db_dir, lang, lang_file)
if os.path.isfile(fname):
lang_data = get_json_data(fname)
else:
lang_data = {}
# Process the file
with io.open(
os.path.join(args.output_dir, lang, lang_file), "w", encoding="utf-8"
) as lang_out:
lang_out.write(six.u("{")) # Start of set
sep = ""
fields = [u"description", u"extra", u"name"]
for card in cards:
if card not in lang_data or lang == LANGUAGE_XX:
# Card is missing, need to add it
lang_data[card] = {}
if lang == LANGUAGE_DEFAULT:
# Default language gets bare minimum. Really need to add by hand.
lang_data[card]["extra"] = ""
lang_data[card]["name"] = card
lang_data[card]["description"] = ""
lang_data[card]["untranslated"] = ", ".join(fields)
lang_default = lang_data
else:
# All other languages should get the default languages' text
lang_data[card]["extra"] = lang_default[card]["extra"]
lang_data[card]["name"] = lang_default[card]["name"]
lang_data[card]["description"] = lang_default[card][
"description"
]
lang_data[card]["untranslated"] = ", ".join(fields)
else:
# Card exists, figure out what needs updating (don't update default language)
if lang != LANGUAGE_DEFAULT:
if "untranslated" in lang_data[card]:
# Has an 'untranslated' field. Process accordingly
if not lang_data[card]["untranslated"].strip():
# It is empty, so just remove it
del lang_data[card]["untranslated"]
else:
# If a field remains untranslated, then replace with the default languages copy
for field in fields:
if field in lang_data[card]["untranslated"]:
lang_data[card][field] = lang_default[card][
field
]
else:
# Need to create the 'untranslated' field and update based upon existing fields
untranslated = []
for field in fields:
if field not in lang_data[card]:
lang_data[card][field] = lang_default[card][field]
untranslated.append(field)
if untranslated:
# only add if something is still needing translation
lang_data[card]["untranslated"] = ", ".join(
untranslated
)
lang_out.write(json_dict_entry({card: lang_data[card]}, sep))
lang_data[card]["used"] = True
sep = ","
# Now keep any unused values just in case needed in the future
for key in lang_data:
if "used" not in lang_data[key]:
lang_data[key][
"untranslated"
] = "Note: This card is currently not used."
lang_out.write(json_dict_entry({key: lang_data[key]}, sep))
sep = ","
lang_out.write(six.u("\n}\n")) # End of Set
if lang == LANGUAGE_DEFAULT:
lang_default = lang_data # Keep for later languages
###########################################################################
# Fix up the sets_db.json file
# Place entries in alphabetical order
###########################################################################
lang_file = "sets_db.json"
set_data = get_json_data(os.path.join(args.card_db_dir, lang_file))
with io.open(
os.path.join(args.output_dir, lang_file), "w", encoding="utf-8"
) as lang_out:
lang_out.write(six.u("{")) # Start of set
sep = ""
sets = []
for s in sorted(set_data):
lang_out.write(json_dict_entry({s: set_data[s]}, sep))
sep = ","
if s not in sets:
sets.append(s)
lang_out.write(six.u("\n}\n")) # End of Set
print("Sets:")
print(sets)
print()
###########################################################################
# Fix up all the xx/sets_xx.json files
# Place entries in alphabetical order
# If entries don't exist:
# If the default language, set from information in the "sets_db.json" file,
# If not the default language, set based on information from the default language.
# Lastly, keep any extra entries that are not currently used, just in case needed
# in the future or is a work in progress.
###########################################################################
for lang in languages:
lang_file = "sets_" + lang + ".json"
fname = os.path.join(args.card_db_dir, lang, lang_file)
if os.path.isfile(fname):
lang_set_data = get_json_data(fname)
else:
lang_set_data = {}
with io.open(
os.path.join(args.output_dir, lang, lang_file), "w", encoding="utf-8"
) as lang_out:
lang_out.write(six.u("{")) # Start of set
sep = ""
for s in sorted(set_data):
if s not in lang_set_data:
lang_set_data[s] = {}
if lang == LANGUAGE_DEFAULT:
lang_set_data[s]["set_name"] = s.title()
lang_set_data[s]["text_icon"] = set_data[s]["text_icon"]
if "short_name" in set_data[s]:
lang_set_data[s]["short_name"] = set_data[s]["short_name"]
if "set_text" in set_data[s]:
lang_set_data[s]["set_text"] = set_data[s]["set_text"]
else:
lang_set_data[s]["set_name"] = lang_default[s]["set_name"]
lang_set_data[s]["text_icon"] = lang_default[s]["text_icon"]
if "short_name" in lang_default[s]:
lang_set_data[s]["short_name"] = lang_default[s][
"short_name"
]
if "set_text" in lang_default[s]:
lang_set_data[s]["set_text"] = lang_default[s]["set_text"]
else:
if lang != LANGUAGE_DEFAULT:
for x in lang_default[s]:
if x not in lang_set_data[s] and x != "used":
lang_set_data[s][x] = lang_default[s][x]
lang_out.write(json_dict_entry({s: lang_set_data[s]}, sep))
lang_set_data[s]["used"] = True
sep = ","
# Now keep any unused values just in case needed in the future
for key in lang_set_data:
if "used" not in lang_set_data[key]:
lang_out.write(json_dict_entry({key: lang_set_data[key]}, sep))
sep = ","
lang_out.write(six.u("\n}\n")) # End of Set
if lang == LANGUAGE_DEFAULT:
lang_default = lang_set_data # Keep for later languages
###########################################################################
# bonuses_xx files
###########################################################################
for lang in languages:
# Special case for xx. Reseed from default language
fromLanguage = lang
if lang == LANGUAGE_XX:
fromLanguage = LANGUAGE_DEFAULT
copyfile(
os.path.join(
args.card_db_dir, fromLanguage, "bonuses_" + fromLanguage + ".json"
),
os.path.join(args.output_dir, lang, "bonuses_" + lang + ".json"),
)
###########################################################################
# translation.txt
###########################################################################
copyfile(
os.path.join(args.card_db_dir, "translation.md"),
os.path.join(args.output_dir, "translation.md"),
)
# Since xx is the starting point for new translations,
# make sure xx has the latest copy of translation.txt
copyfile(
os.path.join(args.card_db_dir, LANGUAGE_XX, "translation.txt"),
os.path.join(args.output_dir, LANGUAGE_XX, "translation.txt"),
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--card_db_dir",
default=os.path.join(
os.path.dirname(os.path.abspath(__file__)), "..", "src", "domdiv", "card_db"
),
help="directory of card data",
)
parser.add_argument(
"--output_dir",
default=os.path.join(
os.path.dirname(os.path.abspath(__file__)), ".", "card_db"
),
help="directory for output data",
)
args = parser.parse_args()
main(args)