#!/bin/bash # 2009-2010, 2012, 2017, 2020 Etersoft www.etersoft.ru # Author: Vitaly Lipatov <lav@etersoft.ru> # Public domain # TODO: rewrite with shell commands, perl or C # Python - http://www.linuxtopia.org/online_books/programming_books/python_programming/python_ch16s03.html # Shell - http://linux.byexamples.com/archives/127/uniq-and-basic-set-theory/ # - http://maiaco.com/articles/shellSetOperations.php # Perl - http://docstore.mik.ua/orelly/perl/cookbook/ch04_09.htm # - http://blogs.perl.org/users/polettix/2012/03/sets-operations.html # http://rosettacode.org/wiki/Symmetric_difference # TODO: add unit tests # http://ru.wikipedia.org/wiki/Операции_над_множествами # Base set operations: # * union # "1 2 3" "3 4 5" -> "1 2 3 4 5" # * intersection # "1 2 3" "3 4 5" -> "3" # * relative complement (substracted, difference) ( A ? B – members in A but not in B ) # http://en.wikipedia.org/wiki/Complement_%28set_theory%29 # "1 3" "1 2 3 4" -> "2 4" # * symmetric difference (симметричная разность) ( A ^ B – members in A or B but not both ) # http://en.wikipedia.org/wiki/Symmetric_difference # "1 2 3" "3 4 5" -> "1 2 4 5" fatal() { echo "FATAL: $*" >&2 exit 1 } filter_strip_spaces() { # possible use just #xargs echo sed -e "s| \+| |g" -e "s|^ ||" -e "s| \$||" } strip_spaces() { echo "$*" | filter_strip_spaces } is_empty() { [ "$(strip_spaces "$*")" = "" ] } isempty() { is_empty "$@" } has_space() { # not for dash: # [ "$1" != "${1/ //}" ] [ "$(echo "$*" | sed -e "s| ||")" != "$*" ] } list() { local i set -f for i in $@ ; do echo "$i" done set +f } count() { set -f list $@ | wc -l set +f } union() { set -f strip_spaces $(list $@ | sort -u) set +f } intersection() { local RES="" local i j for i in $2 ; do for j in $1 ; do [ "$i" = "$j" ] && RES="$RES $i" done done strip_spaces "$RES" } uniq() { union $@ } has() { local wd="$1" shift echo "$*" | grep -q -- "$wd" } # Note: used egrep! write '[0-9]+(first|two)', not '[0-9]\+...' match() { local wd="$1" shift echo "$*" | grep -E -q -- "$wd" } # remove_from_list "1." "11 12 21 22" -> "21 22" reg_remove() { local i local RES= set -f for i in $2 ; do echo "$i" | grep -q "^$1$" || RES="$RES $i" done set +f strip_spaces "$RES" } # remove_from_list "1." "11 12 21 22" -> "21 22" reg_wordremove() { local i local RES="" set -f for i in $2 ; do echo "$i" | grep -q -w "$1" || RES="$RES $i" done set +f strip_spaces "$RES" } reg_rqremove() { local i local RES="" for i in $2 ; do [ "$i" = "$1" ] || RES="$RES $i" done strip_spaces "$RES" } # Args: LIST1 LIST2 # do_exclude_list print LIST2 list exclude fields contains also in LIST1 # Example: exclude "1 3" "1 2 3 4" -> "2 4" exclude() { local i local RES="$2" set -f for i in $1 ; do RES="$(reg_rqremove "$i" "$RES")" done set +f strip_spaces "$RES" } # regexclude_list "22 1." "11 12 21 22" -> "21" reg_exclude() { local i local RES="$2" set -f for i in $1 ; do RES="$(reg_remove "$i" "$RES")" done set +f strip_spaces "$RES" } # regexclude_list "22 1." "11 12 21 22" -> "21" reg_wordexclude() { local i local RES="$2" set -f for i in $1 ; do RES=$(reg_wordremove "$i" "$RES") done set +f strip_spaces "$RES" } if_contain() { local i set -f for i in $2 ; do [ "$i" = "$1" ] && return done set +f return 1 } difference() { local RES="" local i set -f for i in $1 ; do if_contain $i "$2" || RES="$RES $i" done for i in $2 ; do if_contain $i "$1" || RES="$RES $i" done set +f strip_spaces "$RES" } # FIXME: # reg_include "1." "11 12 21 22" -> "11 12" reg_include() { local i local RES="" set -f for i in $2 ; do echo "$i" | grep -q -w "$1" && RES="$RES $i" done set +f strip_spaces "$RES" } contains() { #estrlist has "$1" "$2" local res="$(reg_wordexclude "$1" "$2")" [ "$res" != "$2" ] } example() { local CMD="$1" local ARG1="$2" shift 2 echo "\$ $0 $CMD \"$ARG1\" \"$@\"" $0 $CMD "$ARG1" "$@" } example_res() { example "$@" && echo TRUE || echo FALSE } help() { echo "estrlist developed for string list operations. See also cut, join, paste..." echo "Usage: $0 <command> [args]" echo "Commands:" echo " strip_spaces [args] - remove extra spaces" # TODO: add filter # echo " filter_strip_spaces - remove extra spaces from words from standart input" # echo " reg_remove <PATTERN> [word list] - remove words containing a match to the given PATTERN (grep notation)" # echo " reg_wordremove <PATTERN> [word list] - remove words containing a match to the given PATTERN (grep -w notation)" echo " exclude <list1> <list2> - print list2 items exclude list1 items" echo " reg_exclude <list PATTERN> [word list] - print only words that do not match PATTERN" # echo " reg_wordexclude <list PATTERN> [word list] - print only words do not match PATTERN" echo " has <PATTERN> string - check the string for a match to the regular expression given in PATTERN (grep notation)" echo " match <PATTERN> string - check the string for a match to the regular expression given in PATTERN (egrep notation)" echo " isempty [string] (is_empty) - true if string has no any symbols (only zero or more spaces)" echo " has_space [string] - true if string has no spaces" echo " union [word list] - sort and remove duplicates" echo " intersection <list1> <list2> - print only intersected items (the same in both lists)" echo " difference <list1> <list2> - symmetric difference between lists items (not in both lists)" echo " uniq [word list] - alias for union" echo " list [word list] - just list words line by line" echo " count [word list] - print word count" echo " contains <word> [word list] - check if word list contains the word" echo echo "Examples:" # example reg_remove "1." "11 12 21 22" # example reg_wordremove "1." "11 12 21 22" example exclude "1 3" "1 2 3 4" example reg_exclude "22 1." "11 12 21 22" example reg_wordexclude "wo.* er" "work were more else" example union "1 2 2 3 3" example_res contains "wo" "wo wor" example_res contains "word" "wo wor" example count "1 2 3 4 10" example_res isempty " " #example_res isempty " 1 " example_res has ex "exactly" example_res has exo "exactly" example_res match "M[0-9]+" "M250" example_res match "M[0-9]+" "MI" } COMMAND="$1" if [ -z "$COMMAND" ] ; then echo "Run with --help for get command description." >&2 exit 1 fi if [ "$COMMAND" = "-h" ] || [ "$COMMAND" = "--help" ] ; then COMMAND="help" fi # case "$COMMAND" in reg_remove|reg_wordremove) fatal "obsoleted command $COMMAND" ;; esac shift # FIXME: do to call function directly, use case instead? if [ "$COMMAND" = "--" ] ; then # ignore all options (-) COMMAND="$1" shift "$COMMAND" "$@" elif [ "$1" = "-" ] ; then shift "$COMMAND" "$(cat) $@" elif [ "$2" = "-" ] ; then "$COMMAND" "$1" "$(cat)" else "$COMMAND" "$@" fi