#!/usr/bin/env python3 """ This script checks for unauthorized modifications in autogenerated sections of markdown files. It is designed to be used in a GitHub Actions workflow or a local pre-commit hook. Features: - Detects markdown files changed between a commit and one of its ancestors. Default is to check the last commit only. When triggered on a pull request it should typically compare the pull request branch head and its merge base - the commit on the main branch before it diverged. - Identifies modified autogenerated sections marked by specific comments. - Reports violations using GitHub Actions error messages. - Exits with a nonzero status code if unauthorized changes are found. """ import re import subprocess import sys def run_git(args): """ Run a Git command with the provided arguments and return its output as a string. """ return subprocess.run(["git"] + args, stdout=subprocess.PIPE, text=True, check=True).stdout.strip() def get_changed_files(base, head): """ Retrieve a list of markdown files that were changed between the base and head commits. """ files = run_git(["diff", "--name-only", f"{base}...{head}"]).splitlines() return [f for f in files if f.endswith(".md")] def get_diff(file, base, head): """ Get the diff of a given file between the base and head commits. """ return run_git(["diff", "-U0", f"{base}...{head}", "--", file]).splitlines() def get_file_content(ref, file): """ Retrieve the content of a file from a given Git reference. """ try: return run_git(["show", f"{ref}:{file}"]).splitlines() except Exception: return [] def find_regions(lines): """ Identify the start and end line numbers of autogenerated regions in a file. """ regions = [] start = None for i, line in enumerate(lines, 1): if "rem autogenerated options start" in line: start = i elif "rem autogenerated options stop" in line and start is not None: regions.append((start, i)) start = None return regions def in_region(ln, regions): """ Check if a given line number falls within an autogenerated region. """ return any(start <= ln <= end for start, end in regions) def show_error(file_name, line, message): """ Print an error message in a GitHub Actions-compatible format. """ print(f"::error file={file_name},line={line}::{message} at {file_name} line {line}") def check_file(file, base, head): """ Check a markdown file for modifications in autogenerated regions. """ viol = False new_lines = get_file_content("HEAD", file) old_lines = get_file_content("HEAD~1", file) # If old file did not exist or was empty then don't check if not old_lines: return # Entire autogenerated file check. if any("autogenerated - DO NOT EDIT" in l for l in new_lines[:10]): if get_diff(file, base, head): show_error(file, 1, "Autogenerated file modified") return True return False # Partial autogenerated regions. regions_new = find_regions(new_lines) regions_old = find_regions(old_lines) diff = get_diff(file, base, head) hunk_re = re.compile(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@") new_ln = old_ln = None for line in diff: if line.startswith("@@"): m = hunk_re.match(line) if m: old_ln = int(m.group(1)) new_ln = int(m.group(3)) elif new_ln is None: continue elif line.startswith("+"): if in_region(new_ln, regions_new): show_error(file, new_ln, "Autogenerated region of file modified") viol = True new_ln += 1 elif line.startswith("-"): if in_region(old_ln, regions_old): show_error(file, old_ln, "Autogenerated region of file modified") viol = True old_ln += 1 else: new_ln += 1 old_ln += 1 return viol def main(): """ Main function that iterates over changed files and checks them for violations. """ base = "HEAD~1" head = "HEAD" if len(sys.argv) > 1: base = sys.argv[1] if len(sys.argv) > 2: head = sys.argv[2] found = False for f in get_changed_files(base, head): if check_file(f, base, head): found = True if found: sys.exit(1) print("No unauthorized edits found in autogenerated sections.") sys.exit(0) if __name__ == "__main__": main()