Add script for detect similarities in translation string

stable
Jesús Espino 2017-03-03 12:48:33 +01:00
parent 0fa7d8d630
commit 683dd2300a
1 changed files with 62 additions and 0 deletions

View File

@ -0,0 +1,62 @@
import json
import os
import click
from difflib import SequenceMatcher
ROOT_PATH = os.path.dirname(os.path.dirname(__file__))
DEFAULT_LOCALE_PATH = os.path.join(ROOT_PATH, "app/locales/taiga/locale-en.json")
def keywords(key, value):
if key is not None and not isinstance(value, dict):
return [(".".join(key), value)]
if key is not None and isinstance(value, dict):
kws = []
for item_key in value.keys():
kws += keywords(key+[item_key], value[item_key])
return kws
if key is None and isinstance(value, dict):
kws = []
for item_key in value.keys():
kws += keywords([item_key], value[item_key])
return kws
@click.command()
@click.option('--threshold', default=1.0, help='Minimun similarity to show')
@click.option('--min-length', default=10, help='Minimun size of the string to show')
@click.option('--omit-identical', default=False, is_flag=True, help='Omit identical strings')
def verify_similarity(threshold, min_length, omit_identical):
locales = json.load(open(DEFAULT_LOCALE_PATH))
all_keywords = keywords(None, locales)
already_shown_keys = set()
for key1, value1 in all_keywords:
for key2, value2 in all_keywords:
if key1 == key2:
continue
if len(value1) < min_length and len(value2) < min_length:
continue
similarity = SequenceMatcher(None, value1, value2).ratio()
if omit_identical and similarity == 1.0:
continue
if similarity >= threshold:
if (key1, key2) not in already_shown_keys:
already_shown_keys.add((key1, key2))
already_shown_keys.add((key2, key1))
click.echo(
"The keys {} and {} has a similarity of {}\n - {}\n - {}".format(
key1,
key2,
similarity,
value1,
value2
)
)
if __name__ == "__main__":
verify_similarity()