#!/bin/sh # eget - simply shell on wget for loading directories over http (wget does not support wildcard for http) # Use: # eget http://ftp.altlinux.ru/pub/security/ssl/* # # Copyright (C) 2014-2014, 2016, 2020, 2022 Etersoft # Copyright (C) 2014 Daniil Mikhailov <danil@etersoft.ru> # Copyright (C) 2016-2017, 2020, 2022 Vitaly Lipatov <lav@etersoft.ru> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # fatal() { echo "FATAL: $*" >&2 exit 1 } # TODO: arch="$(uname -m)" # copied from eepm project # copied from /etc/init.d/outformat (ALT Linux) isatty() { # Set a sane TERM required for tput [ -n "$TERM" ] || TERM=dumb export TERM test -t 1 } isatty2() { # check stderr test -t 2 } check_tty() { isatty || return which tput >/dev/null 2>/dev/null || return # FreeBSD does not support tput -S echo | tput -S >/dev/null 2>/dev/null || return [ -z "$USETTY" ] || return export USETTY=1 } : ${BLACK:=0} ${RED:=1} ${GREEN:=2} ${YELLOW:=3} ${BLUE:=4} ${MAGENTA:=5} ${CYAN:=6} ${WHITE:=7} set_boldcolor() { [ "$USETTY" = "1" ] || return { echo bold echo setaf $1 } |tput -S } restore_color() { [ "$USETTY" = "1" ] || return { echo op; # set Original color Pair. echo sgr0; # turn off all special graphics mode (bold in our case). } |tput -S } echover() { [ -n "$verbose" ] || return echo "$*" >&2 } # Print command line and run command line showcmd() { if [ -z "$quiet" ] ; then set_boldcolor $GREEN local PROMTSIG="\$" [ "$UID" = 0 ] && PROMTSIG="#" echo " $PROMTSIG $@" restore_color fi >&2 } # Print command line and run command line docmd() { showcmd "$@" "$@" } # copied from epm # print a path to the command if exists in $PATH if which which 2>/dev/null >/dev/null ; then # the best case if we have which command (other ways needs checking) # TODO: don't use which at all, it is binary, not builtin shell command print_command_path() { which -- "$1" 2>/dev/null } elif type -a type 2>/dev/null >/dev/null ; then print_command_path() { type -fpP -- "$1" 2>/dev/null } else print_command_path() { type "$1" 2>/dev/null | sed -e 's|.* /|/|' } fi # check if <arg> is a real command is_command() { print_command_path "$1" >/dev/null } # check man glob filter_glob() { [ -z "$1" ] && cat && return # translate glob to regexp grep "$(echo "$1" | sed -e "s|\*|.*|g" -e "s|?|.|g")$" } filter_order() { if [ -n "$SECONDLATEST" ] ; then sort -V | tail -n2 | head -n1 return fi [ -z "$LATEST" ] && cat && return sort -V | tail -n1 } is_fileurl() { echo "$1" | grep -q "^/" && return echo "$1" | grep -q "file:/" } dir_from_url() { echo "$1" | sed -e 's|^file://*|/|' } is_url() { echo "$1" | grep -q ":/" } # args: cmd <URL> <options> # will run cmd <options> <URL> download_with_mirroring() { local CMD="$1" shift local URL="$1" shift local res $CMD "$@" "$URL" && return res=$? [ -n "$CHECKMIRRORS" ] || return $res MIRROR="https://mirror.eterfund.ru" SECONDURL="$(echo "$URL" | sed -e "s|^.*://|$MIRROR/|")" $CMD "$@" "$SECONDURL" && URL="$SECONDURL" && return MIRROR="https://mirror.eterfund.org" SECONDURL="$(echo "$URL" | sed -e "s|^.*://|$MIRROR/|")" $CMD "$@" "$SECONDURL" && URL="$SECONDURL" && return } check_tty WGETNOSSLCHECK='' CURLNOSSLCHECK='' WGETUSERAGENT='' CURLUSERAGENT='' WGETQ='' #-q CURLQ='' #-s WGETNAMEOPTIONS='--content-disposition' CURLNAMEOPTIONS='--remote-name --remote-header-name' LISTONLY='' CHECKURL='' LATEST='' SECONDLATEST='' CHECKMIRRORS='' TARGETFILE='' set_quiet() { WGETQ='-q' CURLQ='-s' } eget_help() { cat <<EOF eget - wget like downloader wrapper with wildcard support in filename part of URL Usage: eget [options] http://somesite.ru/dir/na*.log Options: -q - quiet mode -k|--no-check-certificate - skip SSL certificate chain support -U|-A|--user-agent - send browser like UserAgent -O-|-O - - output downloaded file to stdout -O file - download to this file --latest - print only latest version of a file --second-latest - print only second to latest version of a file --allow-mirrors - check mirrors if url is not accessible --list|--list-only - print only URLs --check URL - check if the URL is accessible (returns HTTP 200 OK) Examples: $ eget http://ftp.somesite.ru/package-*.x64.tar $ eget http://ftp.somesite.ru/package *.tar $ eget https://github.com/owner/project package*.ext $ eget --list http://ftp.somesite.ru/package-*.tar $ eget --check http://ftp.somesite.ru/test $ eget --list http://download.somesite.ru 'package-*.tar.xz' $ eget --list --latest https://github.com/telegramdesktop/tdesktop/releases 'tsetup.*.tar.xz' EOF } if [ -z "$1" ] ; then echo "eget - wget like downloader wrapper with wildcard support, uses wget or curl as backend" >&2 echo "Run $0 --help to get help" >&2 exit 1 fi while [ -n "$1" ] ; do case "$1" in -h|--help) eget_help exit ;; -q) set_quiet ;; -k|--no-check-certificate) WGETNOSSLCHECK='--no-check-certificate' CURLNOSSLCHECK='-k' ;; -U|-A|--user-agent) user_agent="Mozilla/5.0 (X11; Linux $arch) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" WGETUSERAGENT="-U '$user_agent'" CURLUSERAGENT="-A '$user_agent'" ;; --list|--list-only) LISTONLY="$1" set_quiet ;; --check) CHECKURL="$1" set_quiet ;; --latest) LATEST="$1" ;; --second-latest) SECONDLATEST="$1" ;; --check-mirrors) CHECKMIRRORS="$1" ;; -O) shift TARGETFILE="$1" ;; -O-) TARGETFILE="-" ;; *) break ;; esac shift done WGET="$(print_command_path wget)" if is_fileurl "$1" ; then # put remote content to stdout scat() { local URL="$1" cat "$(dir_from_url "$URL")" } # download to default name of to $2 sget() { local URL="$1" if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then scat "$URL" return elif [ -n "$2" ] ; then cp -av "$(dir_from_url "$URL")" "$2" return fi cp -av "$(dir_from_url "$URL")" . } check_url_is_accessible() { local URL="$1" test -f "$(dir_from_url "$URL")" } elif [ -n "$WGET" ] ; then __wget() { if [ -n "$WGETUSERAGENT" ] ; then docmd $WGET $WGETQ $WGETNOSSLCHECK "$WGETUSERAGENT" "$@" else docmd $WGET $WGETQ $WGETNOSSLCHECK "$@" fi } # put remote content to stdout scat() { local URL="$1" download_with_mirroring __wget "$URL" -O- } # download to default name of to $2 sget() { local URL="$1" if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then scat "$URL" return elif [ -n "$2" ] ; then download_with_mirroring __wget "$URL" -O "$2" return fi # TODO: поддержка rsync для известных хостов? # Не качать, если одинаковый размер и дата # -nc # TODO: overwrite always download_with_mirroring __wget "$URL" $WGETNAMEOPTIONS } check_url_is_accessible() { local URL="$1" __wget --spider -S "$URL" 2>&1 | grep "HTTP/" | tail -n1 | grep -q "200" } else CURL="$(print_command_path curl)" [ -n "$CURL" ] || fatal "There are no wget nor curl in the system. Install it with $ epm install curl" __curl() { if [ -n "$CURLUSERAGENT" ] ; then docmd $CURL --fail -L $CURLQ "$CURLUSERAGENT" $CURLNOSSLCHECK "$@" else docmd $CURL --fail -L $CURLQ $CURLNOSSLCHECK "$@" fi } # put remote content to stdout scat() { local URL="$1" download_with_mirroring __curl "$URL" --output - } # download to default name of to $2 sget() { local URL="$1" local res if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then scat "$1" return elif [ -n "$2" ] ; then download_with_mirroring __curl "$URL" --output "$2" return fi download_with_mirroring __curl "$URL" $CURLNAMEOPTIONS } check_url_is_accessible() { local URL="$1" __curl -LI "$URL" 2>&1 | grep "HTTP/" | tail -n1 | grep -q -w "200\|404" } fi get_github_urls() { # https://github.com/OWNER/PROJECT local owner="$(echo "$1" | sed -e "s|^https://github.com/||" -e "s|/.*||")" #" local project="$(echo "$1" | sed -e "s|^https://github.com/$owner/||" -e "s|/.*||")" #" [ -n "$owner" ] || fatal "Can't get owner from $1" [ -n "$project" ] || fatal "Can't get project from $1" local URL="https://api.github.com/repos/$owner/$project/releases" scat $URL | \ grep -i -o -E '"browser_download_url": "https://.*"' | cut -d'"' -f4 } # drop file path from URL get_host_only() { echo "$1/" | grep -Eo '(.*://[^/]+)' } # Args: URL filename make_fileurl() { local url="$1" local fn="$2" fn="$(echo "$fn" | sed -e 's|^./||' -e 's|^/+||')" if is_fileurl "$url" ; then echo "$url/$fn" return fi # if there is file path from the root of the site if echo "$fn" | grep -q "^/" ; then echo "$(get_host_only "$url")$fn" return fi # if there is no slash in the end of URL if echo "$url" | grep -q -v "/$" ; then echo "$(dirname "$url" | sed -e 's|/*$||')/$fn" return fi # workaround for a slash in the end of URL echo "$(echo "$url" | sed -e 's|/*$||')/$fn" } get_urls() { if is_fileurl "$URL" ; then ls -1 "$(dir_from_url "$URL")" return fi # cat html, divide to lines by tags and cut off hrefs only scat $URL | sed -e 's|<|<\n|g' -e 's|data-file=|href=|g' | \ grep -i -o -E 'href="(.+)"' | cut -d'"' -f2 } if [ -n "$CHECKURL" ] ; then set_quiet check_url_is_accessible "$1" exit fi # separate part for github downloads if echo "$1" | grep -q "^https://github.com/" && \ echo "$1" | grep -q -v "/download/" && [ -n "$2" ] ; then MASK="$2" if [ -n "$LISTONLY" ] ; then get_github_urls "$1" | filter_glob "$MASK" | filter_order exit fi ERROR=0 for fn in $(get_github_urls "$1" | filter_glob "$MASK" | filter_order) ; do sget "$fn" "$TARGETFILE" || ERROR=1 [ -n "$TARGETFILE" ] && [ "$ERROR" = "0" ] && break done exit fi # if mask is the second arg if [ -n "$2" ] ; then URL="$1" MASK="$2" else # do not support / at the end without separately specified mask if echo "$1" | grep -q "/$" ; then fatal "Use http://example.com/e/* to download all files in dir" fi # drop mask part URL="$(dirname "$1")/" # wildcards allowed only in the last part of path MASK=$(basename "$1") fi if echo "$URL" | grep -q "[*?]" ; then fatal "Error: there are globbing symbols (*?) in $URL. It is allowed only for mask part" fi is_url "$MASK" && fatal "eget supports only one URL as argument" [ -n "$3" ] && fatal "too many args: extra '$3'. May be you need use quotes for arg with wildcards." # TODO: curl? # If ftp protocol, just download if echo "$URL" | grep -q "^ftp://" ; then [ -n "$LISTONLY" ] && fatal "TODO: list files for ftp:// is not supported yet" sget "$1" "$TARGETFILE" exit fi if [ -n "$LISTONLY" ] ; then for fn in $(get_urls | filter_glob "$MASK" | filter_order) ; do is_url "$fn" && echo "$fn" && continue make_fileurl "$URL" "$fn" done exit fi # If there is no wildcard symbol like asterisk, just download if echo "$MASK" | grep -qv "[*?]" || echo "$MASK" | grep -q "[?].*="; then if is_fileurl "$1" ; then sget "$(dir_from_url "$1")" "$TARGEFILE" exit fi sget "$1" "$TARGETFILE" exit fi ERROR=0 for fn in $(get_urls | filter_glob "$MASK" | filter_order) ; do is_url "$fn" || fn="$(make_fileurl "$URL" "$fn" )" #" sget "$fn" "$TARGETFILE" || ERROR=1 [ -n "$TARGETFILE" ] && [ "$ERROR" = "0" ] && break done exit $ERROR