#!/usr/bin/env bash # Download a list of mirrors in HTML format, extract the URLs of FTP and # HTTP(S) mirrors, and print them to standard output, terminated by # a single slash and a newline. # ----- msg() (unset IFS; printf '%s: %s\n' "$0" "$*" >&2) err() { msg error: "$@"; } warn() { msg warning: "$@"; } usage() { cat >&2 <] [--] Available sources: ctan / tex / tex_ctan gentoo gnu xorg See the mirror-utils README for more details. EOF } # Avoid $0 (http://mywiki.wooledge.org/BashFAQ/028). script=$(basename "${BASH_SOURCE[0]}") parent=$(dirname "${BASH_SOURCE[0]}") cd "$parent" || { err "cannot cd(1) into '$parent'" exit 1 } # Parse options. Remember to update the mirror-utils README, usage # message, and argument validation after changing these. unset all_urls curl_config while getopts aK: opt; do case $opt in a) all_urls=1 ;; K) curl_config=$OPTARG ;; '?') usage; exit 2 ;; esac done shift $((OPTIND - 1)) readonly all_urls curl_config # Validate arguments. if (( $# < 1 )); then err 'source was not specified' false elif [[ -z ${curl_config-_} ]]; then err 'path to curl config cannot be empty' false fi || { usage; exit 2; } # Keep these URLs synced with those in the XSLT stylesheet. case $1 in ctan|tex|tex_ctan) url=https://ctan.org/mirrors ;; gentoo) url=https://gentoo.org/downloads/mirrors ;; gnu) url=https://gnu.org/prep/ftp.html ;; xorg) url=https://x.org/wiki/Releases/Download ;; *) err "invalid source '$1'" usage exit 2 ;; esac readonly url if (( $# > 1 )); then warn "using source '$1'; ignoring extra arguments" fi # The real work. # # - Only pass --config to curl if -K was specified for this script # (http://mywiki.wooledge.org/BashFAQ/050). Do not modify the eval # command unless you know what you're doing. # - The XSLT stylesheet expects the "url" parameter to be a URL sans # protocol. # - The awk script treats "all_urls" as a Boolean. eval curl --compressed --location --silent --show-error \ "${curl_config+'--config' \"\$curl_config\"}" -- '"$url"' \ | xsltproc --html --stringparam url "${url#*://}" "$script.xslt" - \ | awk -F '/+' -v all_urls="$all_urls" ' /^(ftp|https?):/ { # Terminate with exactly one slash. sub("/*$", "/") # If all_urls is false, allow only one URL per FQDN, # favoring HTTPS over HTTP over FTP. key = all_urls ? $0 : $2 if ($1 == "ftp:" && urls[key] ~ /^https?:/) next if ($1 == "http:" && urls[key] ~ /^https:/) next urls[key] = $0 order[key] = count++ } END { # There should always be *some* input. if (!NR) exit 1 # Respect the ordering of the original list. for (key in order) orderedurls[order[key]] = urls[key] for (i = 0; i < count; ++i) if (i in orderedurls) printf " %s\n", orderedurls[i] }'