mirror of
https://github.com/Mic92/nixos-wiki-infra.git
synced 2024-11-22 08:14:06 +01:00
linkcheck: use array to pass args
This commit is contained in:
parent
7c9a68ff76
commit
91b55c6942
@ -16,19 +16,19 @@ python3 ../main.py filter wikidump.xml wikidump-filtered.xml
|
|||||||
# generate exclude args from allowlist
|
# generate exclude args from allowlist
|
||||||
python3 ../main.py badlinks ../allowed.links exclude-args
|
python3 ../main.py badlinks ../allowed.links exclude-args
|
||||||
|
|
||||||
# exlude sending requests to the wiki
|
extrargs=(
|
||||||
echo "--exclude wiki.nixos.org/wiki" >>exclude-args
|
# exlude sending requests to the wiki
|
||||||
extrargs="$extrargs --exclude wiki.nixos.org/wiki"
|
"--exclude" "wiki.nixos.org/wiki"
|
||||||
excludeargs=$(cat exclude-args)
|
# default is too high
|
||||||
|
"--max-concurrency" "16"
|
||||||
|
)
|
||||||
|
read -r -a excludeargs <<<"$(<exclude-args)"
|
||||||
|
|
||||||
# extract only the text from the filtered xml dump
|
# extract only the text from the filtered xml dump
|
||||||
nix --extra-experimental-features "nix-command flakes" run ..#wikiextractor wikidump-filtered.xml
|
nix --extra-experimental-features "nix-command flakes" run ..#wikiextractor wikidump-filtered.xml
|
||||||
|
|
||||||
# lychee requires .md or .html format files to parse
|
# lychee requires .md or .html format files to parse
|
||||||
find text -type f | grep -v .html | xargs -I{} mv {} "{}.html"
|
find text -type f ! -name "*.html" -print0 | xargs -0 -I{} mv {} "{}.html"
|
||||||
|
|
||||||
# default is too high
|
|
||||||
extrargs="$extrargs --max-concurrency 16"
|
|
||||||
|
|
||||||
# github_token from env or fallback to gh (local dev)
|
# github_token from env or fallback to gh (local dev)
|
||||||
if [ -z "${GITHUB_TOKEN}" ]; then
|
if [ -z "${GITHUB_TOKEN}" ]; then
|
||||||
@ -40,40 +40,36 @@ fi
|
|||||||
|
|
||||||
if [ -n "${GITHUB_TOKEN}" ]; then
|
if [ -n "${GITHUB_TOKEN}" ]; then
|
||||||
echo using github token
|
echo using github token
|
||||||
extrargs="$extrargs --github-token $GITHUB_TOKEN"
|
extrargs+=("--github-token" "$GITHUB_TOKEN")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# shellcheck disable=SC2086
|
|
||||||
# fetch links
|
# fetch links
|
||||||
lychee -E \
|
lychee -E \
|
||||||
--cache --scheme http --scheme https \
|
--cache --scheme http --scheme https \
|
||||||
--include-verbatim $excludeargs $extrargs \
|
--include-verbatim "${excludeargs[@]}" "${extrargs[@]}" \
|
||||||
text |
|
text |
|
||||||
tee lychee.log
|
tee lychee.log
|
||||||
|
|
||||||
# shellcheck disable=SC2086
|
|
||||||
# get all links ignoring the allowlist (allowed.links)
|
# get all links ignoring the allowlist (allowed.links)
|
||||||
lychee -E \
|
lychee -E \
|
||||||
--cache --scheme http --scheme https \
|
--cache --scheme http --scheme https \
|
||||||
--include-verbatim $extrargs \
|
--include-verbatim "${extrargs[@]}" \
|
||||||
text |
|
text |
|
||||||
tee lychee-full.log
|
tee lychee-full.log
|
||||||
|
|
||||||
# shellcheck disable=SC2086
|
|
||||||
# save fail_map so we can construct wiki link map to failed urls
|
# save fail_map so we can construct wiki link map to failed urls
|
||||||
lychee -E \
|
lychee -E \
|
||||||
--cache --scheme http --scheme https \
|
--cache --scheme http --scheme https \
|
||||||
--include-verbatim $excludeargs $extrargs \
|
--include-verbatim "${excludeargs[@]}" "${extrargs[@]}" \
|
||||||
--format json \
|
--format json \
|
||||||
text >lychee.json
|
text >lychee.json
|
||||||
|
|
||||||
# get archive suggestions
|
# get archive suggestions
|
||||||
# --timeout not working with --suggest see https://github.com/lycheeverse/lychee/issues/1501
|
# --timeout not working with --suggest see https://github.com/lycheeverse/lychee/issues/1501
|
||||||
# TODO remove timeout command later after the issue is fixed
|
# TODO remove timeout command later after the issue is fixed
|
||||||
# shellcheck disable=SC2086
|
|
||||||
timeout 30 lychee -E \
|
timeout 30 lychee -E \
|
||||||
--cache --scheme http --scheme https \
|
--cache --scheme http --scheme https \
|
||||||
--include-verbatim $excludeargs $extrargs \
|
--include-verbatim "${excludeargs[@]}" "${extrargs[@]}" \
|
||||||
--suggest \
|
--suggest \
|
||||||
text |
|
text |
|
||||||
tee lychee-wayback.log
|
tee lychee-wayback.log
|
||||||
|
Loading…
Reference in New Issue
Block a user