build: check docs for edits of autogenerated sections

This adds a lint step which checks the top commit for edits to
autogenerated doc sections.
This commit is contained in:
Nick Craig-Wood 2025-03-10 10:31:50 +00:00
parent 6e7a3795f1
commit 6a7e68aaf2
2 changed files with 139 additions and 0 deletions

View File

@ -226,6 +226,8 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install Go
id: setup-go
@ -289,6 +291,10 @@ jobs:
- name: Scan for vulnerabilities
run: govulncheck ./...
- name: Scan edits of autogenerated files
run: bin/check_autogenerated_edits.py
if: github.event_name == 'pull_request'
android:
if: inputs.manual || (github.repository == 'rclone/rclone' && (github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name))
timeout-minutes: 30

133
bin/check_autogenerated_edits.py Executable file
View File

@ -0,0 +1,133 @@
#!/usr/bin/env python3
"""
This script checks for unauthorized modifications in autogenerated sections of markdown files.
It is designed to be used in a GitHub Actions workflow or a local pre-commit hook.
Features:
- Detects markdown files changed in the last commit.
- Identifies modified autogenerated sections marked by specific comments.
- Reports violations using GitHub Actions error messages.
- Exits with a nonzero status code if unauthorized changes are found.
It currently only checks the last commit.
"""
import re
import subprocess
import sys
def run_git(args):
"""
Run a Git command with the provided arguments and return its output as a string.
"""
return subprocess.run(["git"] + args, stdout=subprocess.PIPE, text=True, check=True).stdout.strip()
def get_changed_files():
"""
Retrieve a list of markdown files that were changed in the last commit.
"""
files = run_git(["diff", "--name-only", "HEAD~1", "HEAD"]).splitlines()
return [f for f in files if f.endswith(".md")]
def get_diff(file):
"""
Get the diff of a given file between the last commit and the current version.
"""
return run_git(["diff", "-U0", "HEAD~1", "HEAD", "--", file]).splitlines()
def get_file_content(ref, file):
"""
Retrieve the content of a file from a given Git reference.
"""
try:
return run_git(["show", f"{ref}:{file}"]).splitlines()
except Exception:
return []
def find_regions(lines):
"""
Identify the start and end line numbers of autogenerated regions in a file.
"""
regions = []
start = None
for i, line in enumerate(lines, 1):
if "rem autogenerated options start" in line:
start = i
elif "rem autogenerated options stop" in line and start is not None:
regions.append((start, i))
start = None
return regions
def in_region(ln, regions):
"""
Check if a given line number falls within an autogenerated region.
"""
return any(start <= ln <= end for start, end in regions)
def show_error(file_name, line, message):
"""
Print an error message in a GitHub Actions-compatible format.
"""
print(f"::error file={file_name},line={line}::{message} at {file_name} line {line}")
def check_file(file):
"""
Check a markdown file for modifications in autogenerated regions.
"""
viol = False
new_lines = get_file_content("HEAD", file)
old_lines = get_file_content("HEAD~1", file)
# Entire autogenerated file check.
if any("autogenerated - DO NOT EDIT" in l for l in new_lines[:10]):
if get_diff(file):
show_error(file, 1, "Autogenerated file modified")
return True
return False
# Partial autogenerated regions.
regions_new = find_regions(new_lines)
regions_old = find_regions(old_lines)
diff = get_diff(file)
hunk_re = re.compile(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@")
new_ln = old_ln = None
for line in diff:
if line.startswith("@@"):
m = hunk_re.match(line)
if m:
old_ln = int(m.group(1))
new_ln = int(m.group(3))
elif new_ln is None:
continue
elif line.startswith("+"):
if in_region(new_ln, regions_new):
show_error(file, new_ln, "Autogenerated region of file modified")
viol = True
new_ln += 1
elif line.startswith("-"):
if in_region(old_ln, regions_old):
show_error(file, old_ln, "Autogenerated region of file modified")
viol = True
old_ln += 1
else:
new_ln += 1
old_ln += 1
return viol
def main():
"""
Main function that iterates over changed files and checks them for violations.
"""
found = False
for f in get_changed_files():
if check_file(f):
found = True
if found:
sys.exit(1)
print("No unauthorized edits found in autogenerated sections.")
sys.exit(0)
if __name__ == "__main__":
main()