From eafb39e64842f56309025a0593f3a40a50bc04ea Mon Sep 17 00:00:00 2001 From: tdro Date: Sat, 19 Mar 2022 07:17:26 -0400 Subject: .local/bin/broken-links: Make useful --- .local/bin/broken-links | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/.local/bin/broken-links b/.local/bin/broken-links index ff5c7f8..20da6ef 100755 --- a/.local/bin/broken-links +++ b/.local/bin/broken-links @@ -1,3 +1,27 @@ #!/bin/sh -eu -wget --spider --recursive --level 3 --no-verbose \ - --no-directories --delete-after "$1" 2>&1 | grep --invert-match --extended-regexp 'URL:|unlink:' + +lynx -nocolor -dump -listonly "${1:-https://example.com}" | + grep --color="never" "\." | + while read -r line; do + url=${line#*. } + head=$(wget \ + --user-agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36" \ + --timeout=1 \ + --wait=1 \ + --waitretry=1 \ + --random-wait \ + --no-cache \ + --no-dns-cache \ + --method=HEAD "$url" 2>&1 | + grep --color="never" -E 'HTTP') + response=$(printf '%s' "${head#*... }" | head --lines=1) + printf '%s %s\n' "$url" "$response" + printf '%s %s\n' "$url" "$response" >&2 + done | + awk '{ print $1, $2, $3}' | + column \ + --output-width 80 \ + --table \ + --table-columns URL,STATUS,MESSAGE \ + --table-truncate URL | + sort --reverse --key=2 -- cgit v1.2.3