Merge pull request #142 from phanirithvij/linkcheck-minor-fix

linkcheck: download xmldump conservatively
This commit is contained in:
Jörg Thalheim 2024-09-18 10:55:03 +02:00 committed by GitHub
commit 4c8b167033
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 18 additions and 6 deletions

View File

@ -9,19 +9,30 @@
# %7B is { matches urls inside nix expressions like ${version} etc. # %7B is { matches urls inside nix expressions like ${version} etc.
(.*\.(domain|tld|mydomain|local).*|my.app|%7B) (.*\.(domain|tld|mydomain|local).*|my.app|%7B)
# xml namespaces need not be retrievable urls https://stackoverflow.com/a/27614076
(qemu/1.0|locating-rules/1.0) (qemu/1.0|locating-rules/1.0)
# no need to fetch cache extries # no need to fetch cache extries
(cache.nixos.org|fzakaria.cachix.org) (cache.nixos.org|fzakaria.cachix.org)
# urls in example snippets # urls in example snippets (url status can be 404)
(USERNAME/nixpkgs|your_username/nixpkgs|fooUser/barRepo|code.visualstudio.com/sha|path/to/patch) (USERNAME/nixpkgs|your_username/nixpkgs|fooUser/barRepo|code.visualstudio.com/sha|path/to/patch)
https://github.com/Artturin/nixpkgs/archive/add-swap-options.tar.gz
# works in browser
https://www.phoronix.com/news/Mesa-Delete-Clover-Discussion
# works with git clone # works with git clone
https://review.coreboot.org/coreboot.git https://review.coreboot.org/coreboot.git
# works as intended
https://one.one.one.one/dns-query
# works in browser # works in browser
https://www.phoronix.com/news/Mesa-Delete-Clover-Discussion
https://pypi.org/project/stt/#files https://pypi.org/project/stt/#files
https://static-web-server.net/$
https://static-web-server.net/configuration/config-file/
https://static-web-server.net/features/security-headers/
# reddit working posts
reddit.com/r/XMG_gg/comments/ic7vt7/fusion15_linux_how_to_fix_thunderbolttb3_dock_usb
reddit.com/r/NixOS/comments/31lx3i/windows_and_nixos_dual_boot
reddit.com/r/NixOS/comments/nuclde/how_to_properly_set_up_lidclose_behaviour_on_a

View File

@ -8,7 +8,8 @@ workdir="$SCRIPT_DIR/workdir"
mkdir -p "$workdir" mkdir -p "$workdir"
pushd "$workdir" || exit pushd "$workdir" || exit
curl "https://wiki.nixos.org/wikidump.xml.zst" | zstd -d >wikidump.xml curl -o wikidump.xml.zst "https://wiki.nixos.org/wikidump.xml.zst" -z wikidump.xml.zst
<wikidump.xml.zst zstd -d >wikidump.xml
# filter unimportant pages like User:* Talk:* # filter unimportant pages like User:* Talk:*
python3 ../main.py filter wikidump.xml wikidump-filtered.xml python3 ../main.py filter wikidump.xml wikidump-filtered.xml
@ -17,7 +18,7 @@ python3 ../main.py filter wikidump.xml wikidump-filtered.xml
python3 ../main.py badlinks ../allowed.links exclude-args python3 ../main.py badlinks ../allowed.links exclude-args
extrargs=( extrargs=(
# exlude sending requests to the wiki # exclude sending requests to the wiki
"--exclude" "wiki.nixos.org/wiki" "--exclude" "wiki.nixos.org/wiki"
# default is too high # default is too high
"--max-concurrency" "16" "--max-concurrency" "16"