diff --git a/scripts/search-for-similar-strings.py b/scripts/search-for-similar-strings.py new file mode 100755 index 00000000..c02c4c37 --- /dev/null +++ b/scripts/search-for-similar-strings.py @@ -0,0 +1,62 @@ +import json +import os +import click +from difflib import SequenceMatcher + +ROOT_PATH = os.path.dirname(os.path.dirname(__file__)) +DEFAULT_LOCALE_PATH = os.path.join(ROOT_PATH, "app/locales/taiga/locale-en.json") + + +def keywords(key, value): + if key is not None and not isinstance(value, dict): + return [(".".join(key), value)] + + if key is not None and isinstance(value, dict): + kws = [] + for item_key in value.keys(): + kws += keywords(key+[item_key], value[item_key]) + return kws + + if key is None and isinstance(value, dict): + kws = [] + for item_key in value.keys(): + kws += keywords([item_key], value[item_key]) + return kws + + +@click.command() +@click.option('--threshold', default=1.0, help='Minimun similarity to show') +@click.option('--min-length', default=10, help='Minimun size of the string to show') +@click.option('--omit-identical', default=False, is_flag=True, help='Omit identical strings') +def verify_similarity(threshold, min_length, omit_identical): + locales = json.load(open(DEFAULT_LOCALE_PATH)) + all_keywords = keywords(None, locales) + already_shown_keys = set() + + for key1, value1 in all_keywords: + for key2, value2 in all_keywords: + if key1 == key2: + continue + if len(value1) < min_length and len(value2) < min_length: + continue + + similarity = SequenceMatcher(None, value1, value2).ratio() + if omit_identical and similarity == 1.0: + continue + + if similarity >= threshold: + if (key1, key2) not in already_shown_keys: + already_shown_keys.add((key1, key2)) + already_shown_keys.add((key2, key1)) + click.echo( + "The keys {} and {} has a similarity of {}\n - {}\n - {}".format( + key1, + key2, + similarity, + value1, + value2 + ) + ) + +if __name__ == "__main__": + verify_similarity()