#!/bin/bash [ $1 ] && TV2HTML_DIR=$1 if [ ! -d "$TV2HTML_DIR" ]; then echo 'Usage: updatedb ' >&2 exit 1 fi echo This is updatedb, started on `date` rm -f $TV2HTML_DIR/All_stations.db for db in $TV2HTML_DIR/*.db; do station=`basename $db .db` echo processing $station gawk -v "station=$station" ' { # Find hidden date append_station = 0 if (match($0 "x", /{AL_MAGENTA}.*{CONCEAL.*[^0-9][0-9][0-9][0-9][0-9][0-9][0-9][^0-9]/)) { pos = match($0, /[^0-9][0-9][0-9][0-9][0-9][0-9][0-9][^0-9]/) date = substr($0, pos + 5, 2) substr($0, pos + 3, 2) substr($0, pos + 1, 2) } else if (match($0, /{AL_MAGENTA}{CONCEAL}[0-9][0-9][0-9][0-9][0-9][^0-9]/)) { append_station = 1 dump = 0 } if ($0 ~ /Satelliten-Empfang über ASTRA{GR_RED}/) { skippage = 1 } else if ($0 ~ /bis.*[0-9][0-9]\.[0-9][0-9].*Uhr/ || $0 ~ /^{GR_RED}{GCHR_12}{GCHR_12}.*>>/ || $0 ~ /ca.*um.*[0-9].?\.[0-9][0-9].*Ende/ || $0 ~ /^{PAGE [0-9][0-9][0-9]\/[0-9].*}$/ || $0 ~ /[0-9]\.[0-9][0-9].*Sendeschluß/) { dump = 0 skippage = 0 } else if (!skippage && $0 ~ /^({[^}]*}| )[0-9][0-9]\.[0-9][0-9]( |{[^}]*})( |{[^}]*})[0-9][0-9][0-9][0-9]/) { dump = 1 # Find first line of telecast stripline = $0 gsub(/{[^}]*}/, " ", stripline) firstline = 0 tmp_station = station prefix = date "~" substr(stripline, 2, 2) substr(stripline, 5, 2) "~" substr(stripline, 9, 4) "~" station dumpprefix = 1 } if (dump || append_station) { if (!date) { print "updatedb: Error: No date found." > "/dev/stderr" exit 1 } stripline = $0 # Find sign for PALplus gsub(/{GR_YELLOW}{GCHR_12}/, " «BB» ", stripline) # Replace symbols gsub(/{GR_YELLOW}{GCHR_63}/, " «UT» ", stripline) gsub(/{AL_YELLOW}\(16:9\)( *$|{AL_[A-Z]*})/, " «BB» ", stripline) gsub(/{AL_RED}UT( *$|{AL_[A-Z]*})/, " «UT» ", stripline) gsub(/{AL_YELLOW}d\/f( *$|{AL_[A-Z]*})/, " «ZK» ", stripline) gsub(/{AL_YELLOW}d\/spanisch( *$|{AL_[A-Z]*})/, " «ZK» ", stripline) gsub(/{AL_YELLOW}s\/w( *$|{AL_[A-Z]*})/, " «SW» ", stripline) gsub(/{AL_YELLOW}oo( *$|{AL_[A-Z]*})/, " «ZK» ", stripline) gsub(/{AL_YELLOW}II( *$|{AL_[A-Z]*})/, " «ZK» ", stripline) gsub(/{AL_YELLOW}DS( *$|{AL_[A-Z]*})/, " «DS» ", stripline) gsub(/{AL_YELLOW}DS oo( *$|{AL_[A-Z]*})/, " «DS» «ZK» ", stripline) gsub(/{AL_YELLOW}sw( *$|{AL_[A-Z]*})/, " «SW» ", stripline) gsub(/{AL_BLUE}Format 16:9/, " «BB» ", stripline) gsub(/{AL_GREEN}PAL PLUS/, " «BB» ", stripline) gsub(/{CONCEAL}/, "[", stripline) # Remove hidden parts of page gsub(/{AL_[A-Z]*}/, "]", stripline) gsub(/{GR_[A-Z]*}/, "]", stripline) gsub(/{[^}]*}/, " ", stripline) stripline = stripline "]" while (match(stripline, /\[[^]]*\]/)) { sub(/\[[^]]*\]/, substr(" ", 1, RLENGTH), stripline) } gsub(/\]/, " ", stripline) gsub(/ *$/, "", stripline) # Remove trailing whitespace gsub(/\.\.\.* ?[0-9][0-9][0-9]$/, "", stripline) # Remove links to other pages gsub(/>>>* ?[0-9][0-9][0-9]$/, "", stripline) tmp_str = substr(stripline, 1, 10) # Skip titles in the middle if (gsub(/[a-zA-Z]/, " ", tmp_str) >= 3) { # of the page (arte) stripline = "" } if (!append_station) { if (stripline != "") { if (dumpprefix) { printf "\n" prefix } tmpline = stripline # Find sign for subtitles if (gsub(/\*/, "«UT»", tmpline) > 1) { tmpline = stripline gsub(/\*$/, "«UT»", tmpline) } stripline = tmpline if (gsub(/°°/, "«ZK»", tmpline) > 1) { # Find sign for 2-channel sound tmpline = stripline gsub(/°°$/, "«ZK»", tmpline) } tmpline = substr(tmpline, 14) gsub(/^ /, "", tmpline) printf "~" tmpline # Write line for telecast to database if (length(append_str)) { printf " (" append_str ")" append_str = "" } } else { dump = 0 # Skip everything after empty lines until next entry is found } dumpprefix = 0 } else { append_str = stripline gsub(/^ */, "", append_str) } } } END { print "" } ' $TV2HTML_DIR/$station*[0-9] >> $db done cat $TV2HTML_DIR/*.db > $TV2HTML_DIR/All_stations.db