[PATCH] tools/docs/checktransupdate.py: use metadata to lookup origin path

From: Haoyang LIU

Date: Mon Mar 09 2026 - 06:01:07 EST


The get_origin_path() function assumes that translation files have the
same relative path as their origin files, just with "translations/{locale}"
inserted after "Documentation/". However, this assumption is incorrect
for several translation files where the origin path differs. For example:
translations/zh_CN/dev-tools/gdb-kernel-debugging.rst
-> process/debugging/gdb-kernel-debugging.rst


The correct origin path is specified in each translation file's
:Original: metadata field, which can appear in several formats:
1. Plain path: :Original: Documentation/path/to/file.rst
2. With :ref: :Original: :ref:`Documentation/path/to/file.rst <label>`
3. With :doc: :Original: :doc:`../../../path/to/file`


Add get_origin_path_from_metadata() to parse the :Original: metadata
from translation files and extract the actual origin path. Update
check_per_file() to use metadata-based lookup first, falling back to
the path manipulation heuristic only when no metadata is found.

Signed-off-by: Haoyang LIU <tttturtleruss@xxxxxxxxx>
---
tools/docs/checktransupdate.py | 63 ++++++++++++++++++++++++++++++++--
1 file changed, 61 insertions(+), 2 deletions(-)

diff --git a/tools/docs/checktransupdate.py b/tools/docs/checktransupdate.py
index cc07cda667fc..b3c695fa0f7a 100755
--- a/tools/docs/checktransupdate.py
+++ b/tools/docs/checktransupdate.py
@@ -32,7 +32,7 @@ from datetime import datetime


def get_origin_path(file_path):
- """Get the origin path from the translation path"""
+ """Get the origin path from the translation path by path manipulation (fallback)"""
paths = file_path.split("/")
tidx = paths.index("translations")
opaths = paths[:tidx]
@@ -40,6 +40,62 @@ def get_origin_path(file_path):
return "/".join(opaths)


+def get_origin_path_from_metadata(file_path):
+ """Get the origin path from the :Original: metadata in the translation file.
+
+ The :Original: metadata can have several formats:
+ 1. Plain path: :Original: Documentation/path/to/file.rst
+ 2. With :ref: directive: :Original: :ref:`Documentation/path/to/file.rst <label>`
+ 3. With :doc: directive: :Original: :doc:`../../../path/to/file`
+
+ Returns the origin path if found, None otherwise.
+ """
+ # Pattern to match :Original: line
+ original_re = re.compile(r'^:Original:\s*(.+?)\s*$', re.IGNORECASE)
+ # Pattern to extract path from :ref:`path <label>` or :ref:`path`
+ ref_re = re.compile(r':ref:`([^`<]+?)(?:\s*<[^>]+>)?`')
+ # Pattern to extract path from :doc:`path`
+ doc_re = re.compile(r':doc:`([^`]+)`')
+
+ try:
+ with open(file_path, 'r', encoding='utf-8') as f:
+ # Only check the first 20 lines for metadata
+ for _ in range(20):
+ line = f.readline()
+ if not line:
+ break
+ match = original_re.match(line.strip())
+ if match:
+ original_value = match.group(1).strip()
+
+ # Try to extract from :ref:`...`
+ ref_match = ref_re.search(original_value)
+ if ref_match:
+ return ref_match.group(1).strip()
+
+ # Try to extract from :doc:`...`
+ doc_match = doc_re.search(original_value)
+ if doc_match:
+ doc_path = doc_match.group(1).strip()
+ # Handle relative paths - resolve relative to translation file
+ if doc_path.startswith('../'):
+ trans_dir = os.path.dirname(file_path)
+ resolved = os.path.normpath(os.path.join(trans_dir, doc_path))
+ # Add .rst extension if not present
+ if not resolved.endswith('.rst'):
+ resolved += '.rst'
+ return resolved
+
+ # Plain path (no directive wrapper)
+ if original_value.startswith('Documentation/'):
+ return original_value
+
+ except (IOError, OSError) as e:
+ logging.debug("Could not read file %s: %s", file_path, e)
+
+ return None
+
+
def get_latest_commit_from(file_path, commit):
"""Get the latest commit from the specified commit for the specified file"""
command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
@@ -128,7 +184,10 @@ def valid_commit(commit):

def check_per_file(file_path):
"""Check the translation status for the specified file"""
- opath = get_origin_path(file_path)
+ opath = get_origin_path_from_metadata(file_path)
+ if opath is None:
+ opath = get_origin_path(file_path)
+ logging.debug("No :Original: metadata found, using path-based fallback for %s", file_path)

if not os.path.isfile(opath):
logging.error("Cannot find the origin path for %s", file_path)
--
2.53.0