diff --git a/scripts/format_differences.py b/scripts/format_differences.py index 55a7039177..3280006b04 100644 --- a/scripts/format_differences.py +++ b/scripts/format_differences.py @@ -1,56 +1,111 @@ import collections -import os -import glob - +import re +import sys +from pathlib import Path from pprint import pprint - -import polib # fades - -PO_DIR = os.path.abspath( - os.path.join( - os.path.dirname(__file__), - '..', - )) - - - -DELIMITERS = ("``", "*") - -def has_delimiters(x): - for d in DELIMITERS: - if d in x: - return True - return False - -def main(): - files_with_differences = collections.defaultdict(list) - - for i, pofilename in enumerate(glob.glob(PO_DIR + '**/**/*.po')): +from typing import List + +import polib + +_patterns = [ + ":c:func:`[^`]+`", + ":c:type:`[^`]+`", + ":c:macro:`[^`]+`", + ":c:member:`[^`]+`", + ":c:data:`[^`]+`", + ":py:data:`[^`]+`", + ":py:mod:`[^`]+`", + ":func:`[^`]+`", + ":mod:`[^`]+`", + ":ref:`[^`]+`", + ":class:`[^`]+`", + ":pep:`[^`]+`", + ":data:`[^`]+`", + ":exc:`[^`]+`", + ":term:`[^`]+`", + ":meth:`[^`]+`", + ":envvar:`[^`]+`", + ":file:`[^`]+`", + ":attr:`[^`]+`", + ":const:`[^`]+`", + ":issue:`[^`]+`", + ":opcode:`[^`]+`", + ":option:`[^`]+`", + ":program:`[^`]+`", + ":keyword:`[^`]+`", + ":RFC:`[^`]+`", + ":rfc:`[^`]+`", + ":doc:`[^`]+`", + "``[^`]+``", + "`[^`]+`__", + "`[^`]+`_", + r"\*\*[^\*]+\*\*", # bold text between ** + r"\*[^\*]+\*", # italic text between * +] + +_exps = [re.compile(e) for e in _patterns] + + +def get_sphinx_directives(s: str) -> List[str]: + """ + Parameters: + string containing the text to translate + + Returns: + dictionary containing all the placeholder text as keys + and the correct value. + """ + + output: List[str] = [] + for exp in _exps: + matches = exp.findall(s) + for match in matches: + output.append(match) + # remove the found pattern from the original string + s = s.replace(match, "") + return output + +def ind(level=0): + return f"{' ' * 4 * level}" + +if __name__ == "__main__": + PO_DIR = Path(__file__).resolve().parent.parent + VENV_DIR = PO_DIR / "venv" + + if len(sys.argv) > 1: + filename = sys.argv[1] + files = [] + if filename: + if Path(filename).is_dir(): + files = [i for i in PO_DIR.glob(f"{filename}/*.po") if not i.is_relative_to(VENV_DIR)] + elif not Path(filename).is_file(): + print(f"File not found: '{filename}'") + sys.exit(-1) + else: + files = [filename] + else: + files = [i for i in PO_DIR.glob("**/**/*.po") if not i.is_relative_to(VENV_DIR)] + + for pofilename in files: + print(f"\n> Processing {pofilename}") po = polib.pofile(pofilename) - if po.percent_translated() < 85: - continue for entry in po: - words = [] - wordsid = wordsstr = list() - - if has_delimiters(entry.msgid): - wordsid = [word for word in entry.msgid.split() if has_delimiters(word)] - - if has_delimiters(entry.msgstr): - wordsstr = [word for word in entry.msgstr.split() if has_delimiters(word)] - if len(wordsid) != len(wordsstr): - key = pofilename.replace(PO_DIR, '') - files_with_differences[key].append({ - 'occurrences': entry.occurrences, - 'words': { - 'original': wordsid, - 'translated': wordsstr, - }, - }) + directives_id = get_sphinx_directives(entry.msgid) + directives_str = get_sphinx_directives(entry.msgstr) - return files_with_differences + # Check if any of them is not empty + if directives_id or directives_str: + # Check if the directives are the same + for ori, dst in zip(directives_id, directives_str): + if ori == dst: + continue -pprint(main()) + if ori != dst: + occs = [f"{ind(2)}{t[0]}:{t[1]}" for t in entry.occurrences] + print(f"\n{ind(1)}{pofilename}:{entry.linenum}") + print(f"\n".join(occs)) + print(f"{ind(3)}{ori}") + print(f"{ind(3)}{dst}")