From 6a7e68aaf2d56df069eb31a46f2f6015a9429b24 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 10 Mar 2025 10:31:50 +0000 Subject: [PATCH] build: check docs for edits of autogenerated sections This adds a lint step which checks the top commit for edits to autogenerated doc sections. --- .github/workflows/build.yml | 6 ++ bin/check_autogenerated_edits.py | 133 +++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100755 bin/check_autogenerated_edits.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 615dd2231..e6f7bc0be 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -226,6 +226,8 @@ jobs: - name: Checkout uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Install Go id: setup-go @@ -289,6 +291,10 @@ jobs: - name: Scan for vulnerabilities run: govulncheck ./... + - name: Scan edits of autogenerated files + run: bin/check_autogenerated_edits.py + if: github.event_name == 'pull_request' + android: if: inputs.manual || (github.repository == 'rclone/rclone' && (github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name)) timeout-minutes: 30 diff --git a/bin/check_autogenerated_edits.py b/bin/check_autogenerated_edits.py new file mode 100755 index 000000000..59ad0ff84 --- /dev/null +++ b/bin/check_autogenerated_edits.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +""" +This script checks for unauthorized modifications in autogenerated sections of markdown files. +It is designed to be used in a GitHub Actions workflow or a local pre-commit hook. + +Features: +- Detects markdown files changed in the last commit. +- Identifies modified autogenerated sections marked by specific comments. +- Reports violations using GitHub Actions error messages. +- Exits with a nonzero status code if unauthorized changes are found. + +It currently only checks the last commit. +""" + +import re +import subprocess +import sys + +def run_git(args): + """ + Run a Git command with the provided arguments and return its output as a string. + """ + return subprocess.run(["git"] + args, stdout=subprocess.PIPE, text=True, check=True).stdout.strip() + +def get_changed_files(): + """ + Retrieve a list of markdown files that were changed in the last commit. + """ + files = run_git(["diff", "--name-only", "HEAD~1", "HEAD"]).splitlines() + return [f for f in files if f.endswith(".md")] + +def get_diff(file): + """ + Get the diff of a given file between the last commit and the current version. + """ + return run_git(["diff", "-U0", "HEAD~1", "HEAD", "--", file]).splitlines() + +def get_file_content(ref, file): + """ + Retrieve the content of a file from a given Git reference. + """ + try: + return run_git(["show", f"{ref}:{file}"]).splitlines() + except Exception: + return [] + +def find_regions(lines): + """ + Identify the start and end line numbers of autogenerated regions in a file. + """ + regions = [] + start = None + for i, line in enumerate(lines, 1): + if "rem autogenerated options start" in line: + start = i + elif "rem autogenerated options stop" in line and start is not None: + regions.append((start, i)) + start = None + return regions + +def in_region(ln, regions): + """ + Check if a given line number falls within an autogenerated region. + """ + return any(start <= ln <= end for start, end in regions) + +def show_error(file_name, line, message): + """ + Print an error message in a GitHub Actions-compatible format. + """ + print(f"::error file={file_name},line={line}::{message} at {file_name} line {line}") + +def check_file(file): + """ + Check a markdown file for modifications in autogenerated regions. + """ + viol = False + new_lines = get_file_content("HEAD", file) + old_lines = get_file_content("HEAD~1", file) + + # Entire autogenerated file check. + if any("autogenerated - DO NOT EDIT" in l for l in new_lines[:10]): + if get_diff(file): + show_error(file, 1, "Autogenerated file modified") + return True + return False + + # Partial autogenerated regions. + regions_new = find_regions(new_lines) + regions_old = find_regions(old_lines) + diff = get_diff(file) + hunk_re = re.compile(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@") + new_ln = old_ln = None + + for line in diff: + if line.startswith("@@"): + m = hunk_re.match(line) + if m: + old_ln = int(m.group(1)) + new_ln = int(m.group(3)) + elif new_ln is None: + continue + elif line.startswith("+"): + if in_region(new_ln, regions_new): + show_error(file, new_ln, "Autogenerated region of file modified") + viol = True + new_ln += 1 + elif line.startswith("-"): + if in_region(old_ln, regions_old): + show_error(file, old_ln, "Autogenerated region of file modified") + viol = True + old_ln += 1 + else: + new_ln += 1 + old_ln += 1 + + return viol + +def main(): + """ + Main function that iterates over changed files and checks them for violations. + """ + found = False + for f in get_changed_files(): + if check_file(f): + found = True + if found: + sys.exit(1) + print("No unauthorized edits found in autogenerated sections.") + sys.exit(0) + +if __name__ == "__main__": + main()