#!/bin/sh
# eget - simply shell on wget for loading directories over http (wget does not support wildcard for http)
# Use:
# eget http://ftp.altlinux.ru/pub/security/ssl/*
#
# Copyright (C) 2014-2014, 2016, 2020, 2022  Etersoft
# Copyright (C) 2014 Daniil Mikhailov <danil@etersoft.ru>
# Copyright (C) 2016-2017, 2020, 2022 Vitaly Lipatov <lav@etersoft.ru>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

fatal()
{
    echo "FATAL: $*" >&2
    exit 1
}


# TODO:
arch="$(uname -m)"

# copied from eepm project

# copied from /etc/init.d/outformat (ALT Linux)
isatty()
{
	# Set a sane TERM required for tput
	[ -n "$TERM" ] || TERM=dumb
	export TERM
	test -t 1
}

isatty2()
{
	# check stderr
	test -t 2
}


check_tty()
{
	isatty || return
	which tput >/dev/null 2>/dev/null || return
	# FreeBSD does not support tput -S
	echo | tput -S >/dev/null 2>/dev/null || return
	[ -z "$USETTY" ] || return
	export USETTY=1
}

: ${BLACK:=0} ${RED:=1} ${GREEN:=2} ${YELLOW:=3} ${BLUE:=4} ${MAGENTA:=5} ${CYAN:=6} ${WHITE:=7}

set_boldcolor()
{
	[ "$USETTY" = "1" ] || return
	{
		echo bold
		echo setaf $1
	} |tput -S
}

restore_color()
{
	[ "$USETTY" = "1" ] || return
	{
		echo op; # set Original color Pair.
		echo sgr0; # turn off all special graphics mode (bold in our case).
	} |tput -S
}

echover()
{
    [ -n "$verbose" ] || return
    echo "$*" >&2
}

# Print command line and run command line
showcmd()
{
	if [ -z "$quiet" ] ; then
		set_boldcolor $GREEN
		local PROMTSIG="\$"
		[ "$UID" = 0 ] && PROMTSIG="#"
		echo " $PROMTSIG $@"
		restore_color
	fi >&2
}

# Print command line and run command line
docmd()
{
	showcmd "$@"
	"$@"
}


# copied from epm
# print a path to the command if exists in $PATH
if which which 2>/dev/null >/dev/null ; then
    # the best case if we have which command (other ways needs checking)
    # TODO: don't use which at all, it is binary, not builtin shell command
print_command_path()
{
    which -- "$1" 2>/dev/null
}
elif type -a type 2>/dev/null >/dev/null ; then
print_command_path()
{
    type -fpP -- "$1" 2>/dev/null
}
else
print_command_path()
{
    type "$1" 2>/dev/null | sed -e 's|.* /|/|'
}
fi

# check if <arg> is a real command
is_command()
{
    print_command_path "$1" >/dev/null
}



# check man glob
filter_glob()
{
	[ -z "$1" ] && cat && return
	# translate glob to regexp
	grep "$(echo "$1" | sed -e "s|\*|.*|g" -e "s|?|.|g")$"
}

filter_order()
{
    if [ -n "$SECONDLATEST" ] ; then
        sort -V | tail -n2 | head -n1
        return
    fi
    [ -z "$LATEST" ] && cat && return
    sort -V | tail -n1
}


is_fileurl()
{
    echo "$1" | grep -q "^/" && return
    echo "$1" | grep -q "file:/"
}

dir_from_url()
{
    echo "$1" | sed -e 's|^file://*|/|'
}

is_url()
{
    echo "$1" | grep -q ":/"
}



# args: cmd <URL> <options>
# will run cmd <options> <URL>
download_with_mirroring()
{
    local CMD="$1"
    shift
    local URL="$1"
    shift

    local res
    $CMD "$@" "$URL" && return
    res=$?
    [ -n "$CHECKMIRRORS" ] || return $res

    MIRROR="https://mirror.eterfund.ru"
    SECONDURL="$(echo "$URL" | sed -e "s|^.*://|$MIRROR/|")"
    $CMD "$@" "$SECONDURL" && URL="$SECONDURL" && return

    MIRROR="https://mirror.eterfund.org"
    SECONDURL="$(echo "$URL" | sed -e "s|^.*://|$MIRROR/|")"
    $CMD "$@" "$SECONDURL" && URL="$SECONDURL" && return
}



check_tty

WGETNOSSLCHECK=''
CURLNOSSLCHECK=''
WGETUSERAGENT=''
CURLUSERAGENT=''
WGETQ='' #-q
CURLQ='' #-s
WGETNAMEOPTIONS='--content-disposition'
CURLNAMEOPTIONS='--remote-name --remote-header-name'

LISTONLY=''
CHECKURL=''
LATEST=''
SECONDLATEST=''
CHECKMIRRORS=''
TARGETFILE=''

set_quiet()
{
    WGETQ='-q'
    CURLQ='-s'
}


eget_help()
{
cat <<EOF

eget - wget like downloader wrapper with wildcard support in filename part of URL
Usage: eget [options] http://somesite.ru/dir/na*.log

Options:
    -q                        - quiet mode
    -k|--no-check-certificate - skip SSL certificate chain support
    -U|-A|--user-agent        - send browser like UserAgent
    -O-|-O -                  - output downloaded file to stdout
    -O file                   - download to this file
    --latest                  - print only latest version of a file
    --second-latest           - print only second to latest version of a file
    --allow-mirrors           - check mirrors if url is not accessible

    --list|--list-only        - print only URLs
    --check URL               - check if the URL is accessible (returns HTTP 200 OK)

Examples:
  $ eget http://ftp.somesite.ru/package-*.x64.tar
  $ eget http://ftp.somesite.ru/package *.tar
  $ eget https://github.com/owner/project package*.ext
  $ eget --list http://ftp.somesite.ru/package-*.tar
  $ eget --check http://ftp.somesite.ru/test
  $ eget --list http://download.somesite.ru 'package-*.tar.xz'
  $ eget --list --latest https://github.com/telegramdesktop/tdesktop/releases 'tsetup.*.tar.xz'

EOF
}


if [ -z "$1" ] ; then
    echo "eget - wget like downloader wrapper with wildcard support, uses wget or curl as backend" >&2
    echo "Run $0 --help to get help" >&2
    exit 1
fi


while [ -n "$1" ] ; do

    case "$1" in
        -h|--help)
            eget_help
            exit
            ;;
        -q)
            set_quiet
            ;;
        -k|--no-check-certificate)
            WGETNOSSLCHECK='--no-check-certificate'
            CURLNOSSLCHECK='-k'
            ;;
        -U|-A|--user-agent)
            user_agent="Mozilla/5.0 (X11; Linux $arch) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
            WGETUSERAGENT="-U '$user_agent'"
            CURLUSERAGENT="-A '$user_agent'"
            ;;
        --list|--list-only)
            LISTONLY="$1"
            set_quiet
            ;;
        --check)
            CHECKURL="$1"
            set_quiet
            ;;
        --latest)
            LATEST="$1"
            ;;
        --second-latest)
            SECONDLATEST="$1"
            ;;
        --check-mirrors)
            CHECKMIRRORS="$1"
            ;;
        -O)
            shift
            TARGETFILE="$1"
            ;;
        -O-)
            TARGETFILE="-"
            ;;
        *)
            break
            ;;
    esac
    shift
done




WGET="$(print_command_path wget)"

if is_fileurl "$1" ; then

# put remote content to stdout
scat()
{
    local URL="$1"
    cat "$(dir_from_url "$URL")"
}
# download to default name of to $2
sget()
{
    local URL="$1"
    if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then
       scat "$URL"
       return
    elif [ -n "$2" ] ; then
       cp -av "$(dir_from_url "$URL")" "$2"
       return
    fi
    cp -av "$(dir_from_url "$URL")" .
}

check_url_is_accessible()
{
    local URL="$1"
    test -f "$(dir_from_url "$URL")"
}

elif [ -n "$WGET" ] ; then
__wget()
{
    if [ -n "$WGETUSERAGENT" ] ; then
        docmd $WGET $WGETQ $WGETNOSSLCHECK "$WGETUSERAGENT" "$@"
    else
        docmd $WGET $WGETQ $WGETNOSSLCHECK "$@"
    fi
}

# put remote content to stdout
scat()
{
    local URL="$1"
    download_with_mirroring __wget "$URL" -O-
}
# download to default name of to $2
sget()
{
    local URL="$1"
    if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then
       scat "$URL"
       return
    elif [ -n "$2" ] ; then
       download_with_mirroring __wget "$URL" -O "$2"
       return
    fi
# TODO: поддержка rsync для известных хостов?
# Не качать, если одинаковый размер и дата
# -nc
# TODO: overwrite always
    download_with_mirroring __wget "$URL" $WGETNAMEOPTIONS
}

check_url_is_accessible()
{
    local URL="$1"
    __wget --spider -S "$URL" 2>&1 | grep "HTTP/" | tail -n1 | grep -q "200"
}

else
CURL="$(print_command_path curl)"
[ -n "$CURL" ] || fatal "There are no wget nor curl in the system. Install it with $ epm install curl"
__curl()
{
    if [ -n "$CURLUSERAGENT" ] ; then
        docmd $CURL --fail -L $CURLQ "$CURLUSERAGENT" $CURLNOSSLCHECK "$@"
    else
        docmd $CURL --fail -L $CURLQ $CURLNOSSLCHECK "$@"
    fi
}
# put remote content to stdout
scat()
{
    local URL="$1"
    download_with_mirroring __curl "$URL" --output -
}
# download to default name of to $2
sget()
{
    local URL="$1"
    local res
    if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then
       scat "$1"
       return
    elif [ -n "$2" ] ; then
       download_with_mirroring __curl "$URL" --output "$2"
       return
    fi

    download_with_mirroring __curl "$URL" $CURLNAMEOPTIONS
}

check_url_is_accessible()
{
    local URL="$1"
    __curl -LI "$URL" 2>&1 | grep "HTTP/" | tail -n1 | grep -q -w "200\|404"
}

fi



get_github_urls()
{
    # https://github.com/OWNER/PROJECT
    local owner="$(echo "$1" | sed -e "s|^https://github.com/||" -e "s|/.*||")" #"
    local project="$(echo "$1" | sed -e "s|^https://github.com/$owner/||" -e "s|/.*||")" #"
    [ -n "$owner" ] || fatal "Can't get owner from $1"
    [ -n "$project" ] || fatal "Can't get project from $1"
    local URL="https://api.github.com/repos/$owner/$project/releases"
    scat $URL | \
        grep -i -o -E '"browser_download_url": "https://.*"' | cut -d'"' -f4
}

# drop file path from URL
get_host_only()
{
    echo "$1/" | grep -Eo '(.*://[^/]+)'
}

# Args: URL filename
make_fileurl()
{
    local url="$1"
    local fn="$2"

    fn="$(echo "$fn" | sed -e 's|^./||' -e 's|^/+||')"

    if is_fileurl "$url" ; then
        echo "$url/$fn"
        return
    fi

    # if there is file path from the root of the site
    if echo "$fn" | grep -q "^/" ; then
        echo "$(get_host_only "$url")$fn"
        return
    fi

    # if there is no slash in the end of URL
    if echo "$url" | grep -q -v "/$" ; then
        echo "$(dirname "$url" | sed -e 's|/*$||')/$fn"
        return
    fi

    # workaround for a slash in the end of URL
    echo "$(echo "$url" | sed -e 's|/*$||')/$fn"
}

get_urls()
{
    if is_fileurl "$URL" ; then
        ls -1 "$(dir_from_url "$URL")"
        return
    fi

    # cat html, divide to lines by tags and cut off hrefs only
    scat $URL | sed -e 's|<|<\n|g' -e 's|data-file=|href=|g' | \
         grep -i -o -E 'href="(.+)"' | cut -d'"' -f2
}



if [ -n "$CHECKURL" ] ; then
    set_quiet
    check_url_is_accessible "$1"
    exit
fi


# separate part for github downloads
if echo "$1" | grep -q "^https://github.com/" && \
   echo "$1" | grep -q -v "/download/" && [ -n "$2" ] ; then
    MASK="$2"

    if [ -n "$LISTONLY" ] ; then
        get_github_urls "$1" | filter_glob "$MASK" | filter_order
        exit
    fi

    ERROR=0
    for fn in $(get_github_urls "$1" | filter_glob "$MASK" | filter_order) ; do
        sget "$fn" "$TARGETFILE" || ERROR=1
        [ -n "$TARGETFILE" ] && [ "$ERROR" = "0" ] && break
    done
    exit
fi


# if mask is the second arg
if [ -n "$2" ] ; then
    URL="$1"
    MASK="$2"
else
    # do not support / at the end without separately specified mask
    if echo "$1" | grep -q "/$" ; then
        fatal "Use http://example.com/e/* to download all files in dir"
    fi

    # drop mask part
    URL="$(dirname "$1")/"
    # wildcards allowed only in the last part of path
    MASK=$(basename "$1")
fi

if echo "$URL" | grep -q "[*?]" ; then
    fatal "Error: there are globbing symbols (*?) in $URL. It is allowed only for mask part"
fi

is_url "$MASK" && fatal "eget supports only one URL as argument"
[ -n "$3" ] && fatal "too many args: extra '$3'. May be you need use quotes for arg with wildcards."

# TODO: curl?
# If ftp protocol, just download
if echo "$URL" | grep -q "^ftp://" ; then
    [ -n "$LISTONLY" ] && fatal "TODO: list files for ftp:// is not supported yet"
    sget "$1" "$TARGETFILE"
    exit
fi


if [ -n "$LISTONLY" ] ; then
    for fn in $(get_urls | filter_glob "$MASK" | filter_order) ; do
        is_url "$fn" && echo "$fn" && continue
        make_fileurl "$URL" "$fn"
    done
    exit
fi

# If there is no wildcard symbol like asterisk, just download
if echo "$MASK" | grep -qv "[*?]" || echo "$MASK" | grep -q "[?].*="; then
    if is_fileurl "$1" ; then
        sget "$(dir_from_url "$1")" "$TARGEFILE"
        exit
    fi
    sget "$1" "$TARGETFILE"
    exit
fi

ERROR=0
for fn in $(get_urls | filter_glob "$MASK" | filter_order) ; do
    is_url "$fn" || fn="$(make_fileurl "$URL" "$fn" )" #"
    sget "$fn" "$TARGETFILE" || ERROR=1
    [ -n "$TARGETFILE" ] && [ "$ERROR" = "0" ] && break
done
exit $ERROR