#!/usr/bin/perl

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell

=pod

=head1 NAME

tv_grab_de_tvtoday - Grab TV listings for Germany (from www.tvtoday.de webpage).

=head1 SYNOPSIS

tv_grab_de_tvtoday --help

tv_grab_de_tvtoday [--config-file FILE] --configure [--gui OPTION]

tv_grab_de_tvtoday [--config-file FILE] [--output FILE] 
                   [--days N] [--offset N]
                   [--quiet] [--slow] [--nosqueezeout]

tv_grab_de_tvtoday --list-channels [--icons]

tv_grab_de_tvtoday --capabilities

tv_grab_de_tvtoday --version

=head1 DESCRIPTION

Output TV listings for several channels available in Germany.
The data comes from www.tvtoday.de which is the webpage of one of 
the most popular TV magazines in Germany. The grabber relies on
parsing HTML so it might stop working at any time.

First run B<tv_grab_de_tvtoday --configure> to choose, which channels 
you want to download. Then running B<tv_grab_de_tvtoday> with no 
arguments will output listings in XML format to standard output.

B<--configure> Ask for each available channel whether to download
and write the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_de_tvtoday.conf>.  This is the file 
written by B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--output FILE> Write to FILE rather than standard output.

B<--days N> Grab N days.  The default is seven.

B<--offset N> Start N days in the future.  The default is to start
from today (= zero). Set to -1 to grab data beginning yesterday.

B<--quiet> Suppress the progress messages normally written to standard
error.

B<--slow> enables long strategy run: tvtoday.de publishes only some (vital)
information on the actual listing pages, the rest is shown in a separate
popup window. If you'd like to parse the data from these popups as well,
supply this flag. But consider that the grab process takes much longer when
doing so, since many more web pages have to be retrieved.

B<--nosqueezeout> Disables aggressive squeeze out of information field from
the index pages. If specified, the program description from tvtoday.de is
passed trough as data of the desc-tag, otherwise the data is parsed for
information about actors, director, etc. and understood data gets returned in
it's corresponding field.

B<--list-channels> Write output giving <channel> elements for every
channel available (ignoring the config file), but no programmes.

B<--icons> Get the URL for channel-logos together with the channel-list.
Mind that this takes a long time, since a webpage has to be requested for
every channel.

B<--capabilities> Show which capabilities the grabber supports. For more
information, see L<http://membled.com/twiki/bin/view/Main/XmltvCapabilities>

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHOR

Stefan Siegl, stesie@brokenpipe.de. Inspired by tv_grab_fi by Matti Airas.

=head1 BUGS

If you happen to find a bug, you're requested to send a mail to me
at B<stesie@brokenpipe.de> or to one of the XMLTV mailing lists, see webpages
at http://sourceforge.net/projects/xmltv/.

=cut

use warnings;
use strict;
use XMLTV::Version '$Id: tv_grab_de_tvtoday.in,v 1.46 2006/07/20 20:28:29 stesie Exp $ ';
use XMLTV::Capabilities qw/baseline manualconfig cache share/;
use XMLTV::Description 'Germany (www.tvtoday.de)';
use Date::Manip;
use Getopt::Long;
use HTML::TreeBuilder;
use HTML::Entities;
use URI::Escape;
use XMLTV;
use XMLTV::Ask;
use XMLTV::ProgressBar;
use XMLTV::DST;
use XMLTV::Config_file;
use XMLTV::Mode;
use XMLTV::Get_nice;
use XMLTV::Memoize;
use XMLTV::Usage <<END
$0: get German television listings from www.tvtoday.de in XMLTV format
To configure: $0 --configure [--config-file FILE] [--gui OPTION]
To grab data: $0 [--config-file FILE] [--output FILE] 
                 [--days N] [--offset N]
                 [--quiet] [--slow] [--nosqueezeout]
Channel List: $0 --list-channels [--icons]
To show capabilities: $0 --capabilities
To show version: $0 --version

END
  ;

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
        *t = sub {};
        *d = sub { '' };
    }
    else {
        *t = \&Log::TraceMessages::t;
        *d = \&Log::TraceMessages::d;
	#$Log::TraceMessages::On = 1;
    }
}

#-- our own prototypes first ...
sub grab_data($$$);
sub squeeze_out_desc($$);
sub refine_category_attr($$);
sub get_channels();
sub get_icons();
sub channel_id($);
sub split_up_names($$);
sub parse_date_data($);
sub get_page($);
sub add_credits($$@);
sub parse_page($$);
sub read_popup($$);
sub refine_credits($);

#-- Category-Matching RegExp 
our constant $category_regexp = '^(.*?\s+)?((?:[\w-]+-?)?(?:[Aa]genten|[Cc]harts|[Cc]omedy|[Dd]oku(?:mentar|mentation)?|Episoden|[Dd]rama|[Kk]rimi|[Kk]omdie|[Ll]iteratur|[Mm]agazin|[Mm]elodram|[Pp]ortrt|[Rr]eportage|[Rr]eihe|[Ss]oap|[Ss]atire|[Ss]erie|[Ss]tudie|[Tt]alk|[Tt]hriller|[Ww]unschclip|[Gg]esprch|[Tt]elenovela|[Zz]eichentrick|[Aa]nimation|[Kk]ultur|[Bb]ericht)-?(?:[Ff]ilm|[Mm]ovie|[Ss]how|[Ss]endung)?s?)([\s;,]+.*)?$';

#-- DEBUG FLUFF ...
my $debug = 0;
$XMLTV::Get_nice::Delay = 0 if($debug);

#-- attributes of xmltv root element
my $head = { 
    'source-data-url' => 'http://programm.tvtoday.de/tv/programm/programm.php',
    'source-info-url' => 'http://www.tvtoday.de/',
    'generator-info-name'  => 'XMLTV',
    'generator-info-url'   => 'http://membled.com/work/apps/xmltv/',
};

#-- the timezone tvtoday.de lives in is, CET/CEST
my constant $TZ = "+0100";
my constant $lang = "de";

#-- Parse argv now.  First do undocumented --cache option.
XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');

my $opt_configure;
my $opt_config_file;
my $opt_gui;
my $opt_output;
my $opt_days;
my $opt_offset = 0;
my $opt_quiet = 0;
my $opt_slow = 0;
my $opt_nosqueeze = 0;
my $opt_list_channels;
my $opt_icons = 0;
my $opt_help;
my $opt_share;

GetOptions(
    'configure'      => \$opt_configure,
    'config-file=s'  => \$opt_config_file,
    'gui:s'          => \$opt_gui,
    'output=s'       => \$opt_output,
    'days=i'         => \$opt_days,
    'offset=i'       => \$opt_offset,
    'quiet'          => \$opt_quiet,
    'slow'           => \$opt_slow,
    'nosqueezeout'   => \$opt_nosqueeze,
    'list-channels'  => \$opt_list_channels,
    'icons'          => \$opt_icons,
    'help'           => \$opt_help,
    'share=s'        => \$opt_share,
) or usage(0);

usage(1) if $opt_help;

XMLTV::Ask::init($opt_gui);

#-- make sure offset+days arguments are within range
die "offset mustn't be larger than six"
  if($opt_offset > 6);

warn "cannot fetch data before yesterday, starting yesterday", $opt_offset = -1
  if($opt_offset < -1);
$opt_days = 7 - $opt_offset unless (defined($opt_days));

die "fetching more than seven days isn't possible, check offset+days arguments"
  if($opt_days + $opt_offset > 7);

#-- offset and days should be valid now, let's go on ...



my $mode = XMLTV::Mode::mode('grab', # default value
    $opt_configure 	=> 'configure',
    $opt_list_channels	=> 'list-channels',
);



#-- initialize config file support
my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_de_tvtoday', $opt_quiet);
my @config_lines;

if($mode eq 'configure') {
    XMLTV::Config_file::check_no_overwrite($config_file);
} 
elsif($mode eq 'grab' || $mode eq 'list-channels') {
    @config_lines = XMLTV::Config_file::read_lines($config_file);
} 
else { die("never heard of XMLTV mode $mode, sorry :-(") }

my $bar = new XMLTV::ProgressBar( 'getting list of channels', 1 )
    if not $opt_quiet;

#-- hey, we can't live without channel data, so let's get that now!
my %channels = get_channels();
$bar->update() if not $opt_quiet;
$bar->finish() if not $opt_quiet;

#-- if wanted, get the channel logos (only in list-channels-mode done here!)
my %icons;
%icons = get_icons() if $opt_icons && $opt_list_channels;


# share/ directory for storing channel mapping files.  This next line
# is altered by processing through tv_grab_de_tvtoday.PL.  But we can
# use the current directory instead of share/tv_grab_de_tvtoday for
# development.
#
# The 'source' file tv_grab_de_tvtoday.in has $SHARE_DIR undef, which
# means use the current directory.  In any case the directory can be
# overridden with the --share option (useful for testing).
#
my $SHARE_DIR='/usr/share/xmltv'; # by grab/de_tvtoday/tv_grab_de_tvtoday.PL
$SHARE_DIR = $opt_share if defined $opt_share;
my $OUR_SHARE_DIR = (defined $SHARE_DIR) ? "$SHARE_DIR/tv_grab_de_tvtoday" : '.';

# Read the file with channel mappings.
(my $CHANNEL_NAMES_FILE = "$OUR_SHARE_DIR/channel_ids") =~ tr!/!/!s;
my (%chid_mapping, %seen);
my $line_num = 0;
foreach (XMLTV::Config_file::read_lines($CHANNEL_NAMES_FILE, 1)) {
    ++ $line_num;
    next unless defined;
    my $where = "$CHANNEL_NAMES_FILE:$line_num";
    my @fields = split m/:/;
    die "$where: wrong number of fields"
      if @fields != 2;

    my ($xmltv_id, $tvtoday_id) = @fields;
    warn "$where: tvtoday id $tvtoday_id seen already\n"
      if defined $chid_mapping{$tvtoday_id};
    $chid_mapping{$tvtoday_id} = $xmltv_id;
    warn "$where: XMLTV id $xmltv_id seen already\n"
      if $seen{$xmltv_id}++;
}

my @requests;

#-- read our configuration file now
my $line = 1;
foreach(@config_lines) {
    $line ++;
    next unless defined;

    if (/^channel:?\s+(\S+)/) {
	warn("\nConfigured channel $1 not available anymore. \nPlease reconfigure tv_grab_de_tvtoday.\n"),
	  next unless(defined($channels{$1}));
	push @requests, $1;
    } 
    elsif (/^map:?\s+(\S+)\s+(\S+)/) {
	# Override anything set in the channel_ids file.
	$chid_mapping{$1} = $2;
    } 
    else {
	warn "$config_file:$line: bad line\n";
    }
}



#-- if we're requested to do so, write out a new config file ...
if ($mode eq 'configure') {
    open(CONFIG, ">$config_file") or die("cannot write to $config_file, due to: $!");

    #-- now let's annoy the user, sorry, I meant ask ..
    my @chs = sort keys %channels;
    my @names = map { $channels{$_} } @chs;
    my @qs = map { "add channel $_?" } @names;
    my @want = ask_many_boolean(1, @qs);

    foreach (@chs) {
	my $w = shift @want;
	my $chname = shift @names;
	
	warn("cannot read input, stopping to ask questions ..."), last if not defined $w;

	print CONFIG '#' if not $w; #- comment line out if user answer 'no'

	# shall we store the display name in the config file?
	# leave it in, since it probably makes it a lot easier for the
	# user to choose which channel to comment/uncommet - when manually
	# viing the config file -- are there people who do that?
	print CONFIG "channel $_ #$chname\n";
    }

    close CONFIG or warn "unable to nicely close the config file: $!";
    say("Finished configuration.");

    exit();
}



#-- well, we don't have to write a config file, so, probably it's some xml stuff :)
#-- if not, let's go dying ...
die unless($mode eq 'grab' or $mode eq 'list-channels');

my %writer_args;
if (defined $opt_output) {
    my $handle = new IO::File(">$opt_output");
    die "cannot write to output file, $opt_output: $!" unless (defined $handle);
    $writer_args{'OUTPUT'} = $handle;
}

$writer_args{'encoding'} = 'ISO-8859-1';

if( defined( $opt_days ) ) {
  $writer_args{offset} = $opt_offset;
  $writer_args{days} = $opt_days;
  $writer_args{cutoff} = "000000";
}

#-- create our writer object
my $writer = new XMLTV::Writer(%writer_args);
$writer->start($head);

if ($mode eq 'list-channels') {
    foreach (keys %channels) {
        my %channel = ('id'           => channel_id($_), 
                       'display-name' => [[$channels{$_}, $lang]]); 
        $channel{'icon'} = [{'src' => "http://www.tvtoday.de" . $icons{$_}}] 
          if(defined($icons{$_}));
        $writer->write_channel(\%channel);
    }

    $writer->end();
    exit();
}



#-- there's only one thing, why we might exist: write out tvdata!
die unless ($mode eq 'grab');
die "No channels specified, run me with --configure flag\n" unless(scalar(@requests));


#--  We need to wait with writing the channels, therefore buffer the program-infos
my @writebuffer;

#-- get <programme> tags
my $numdays = $opt_days + $opt_offset - 1;
  
$bar = new XMLTV::ProgressBar('grabbing', scalar(@requests) * $opt_days)
  if not $opt_quiet;

foreach my $channel (@requests) {
    for (my $day = $opt_offset; $day <= $numdays; $day ++) {
	grab_data($channel, $day, $day == $numdays);
	update $bar if not $opt_quiet;
    }
}
$bar->finish()
    if not $opt_quiet;

#-- write out <channel> tags
foreach(@requests) {
    my $id = channel_id($_);
    my %channel = ('id'           => $id, 
                   'display-name' => [[$channels{$_}, $lang]]); 
    $channel{'icon'} = [{'src' => "http://www.tvtoday.de" . $icons{$id}}] 
      if(defined($icons{$id}));
    $writer->write_channel(\%channel);
}

#-- write out <program> tags
$writer->write_programme($_) foreach(@writebuffer);

#-- hey, looks like we've finished ...
$writer->end();



#-- channel_id($s) :: turn site channel id into an xmltv id
sub channel_id($) {
    for (my $s = shift) {
	$_ = lc(defined($chid_mapping{$_}) ? $chid_mapping{$_} : "$_.tvtoday.de");
	$_ = "C$_" if /^\d/;
	return $_;
    }
}



#-- grab_data($ch, $offset, $lday) :: grab the tvdata of one channel for one specific day
sub grab_data($$$) {
    my $ch = shift @_;      #- station id of the channel to grab (without the .tvtoday.de suffix)
    my $offset = shift @_;  #- offset we should use
    my $lday = shift @_;    #- true: last day to grab in row

    #- we got to send ztag=8 to retrieve data for yesterday
    $offset = 8 if($offset < 0);

    my $grab = {
	'channel'   => channel_id($ch),
	'url'       => "http://programm.tvtoday.de/tv/programm/programm.php?ztag=$offset&sparte=alle&uhrzeit=Ax00&sender=$ch",
	'lasttime'  => 0,
	'lastday'   => $lday,
    };

    while (defined($grab->{url})) {
	my $tb = HTML::TreeBuilder->new();
        my $htmldata = get_page($grab->{url});

	die "successful grab of $grab->{url} required, stopping here."
	    unless(defined($htmldata));
	
	$tb->parse($htmldata) or die "cannot parse content of $grab->{url}\n";
	$tb->eof;
	parse_page($tb, $grab);
	$tb->delete();
    }
}

sub convert_cp1252_chars($) {
  my $str = shift;

  $$str =~ s/\204/"/g; 		# Double Low-9 Quotation Mark
  $$str =~ s/\205/.../g; 	# Horizontal Ellipsis
  $$str =~ s/\221/`/g; 		# Left Single Quotation Mark
  $$str =~ s/\222/'/g; 		# Right Single Quotation Mark
  $$str =~ s/\223/"/g; 		# Left Double Quotation Mark
  $$str =~ s/\224/"/g; 		# Right Double Quotation Mark
  $$str =~ s/\225/*/g; 		# Bullet
  $$str =~ s/\226/-/g; 		# En Dash
  $$str =~ s/\227/-/g; 		# Em Dash

  return $str;
}

sub parse_page($$) {
    my $page = shift(@_);
    #$page->dump(); exit 0;
    my $grab = shift @_;
    my $over_headline_table = 0;
    my $pos; 
    my $day;
    
    if(($_ = $page->look_down('_tag' => 'span', 'class' => 'text'))) {
    	if($_->as_text() =~ m/aber wir konnten keine Sendungen mit dieser/) {
     	    warn "no information available for channel ", $grab->{channel};
            undef($grab->{url});
	    return;
	}
    }
	
    #-- extract date of grabbed data from retrieved webpage ...
    $_ = $page->look_down('_tag' => 'span', 'class' => 'text-weiss');
    die("cannot find date on requested page") 
      unless($_->as_text() =~ m/([1-3]?[0-9])\.(1?[0-9])\.(20[0-9]{2})/);
    t "extracted date: $3-$2-$1";
    $day = ParseDate("$3-$2-$1 00:00:00");

    #-- well, now let's scan the table for programme data
    foreach ($page->look_down('_tag' => 'table')) {
	my (%show, $begintime, $stoptime, $popup);

	next if(not defined($_->attr('cellpadding')));
	
	if ($_->attr("cellpadding") eq "2") {
	    last unless($_->as_text() =~ m/weitere Sendungen/);
            #-- don't request another page, if day's over
	    last if($grab->{lasttime} >= 86400); 

	    #-- we have even more shows available, scan that page as well ...
	    my $link = $_->extract_links('a');
	    $grab->{url} = "http://programm.tvtoday.de" . 
	    	$link->[scalar(@$link)-1]->[0];
	    return;
	}

	### skip table, not containing data for us ...
	#print STDERR "cellpadding: ", $_->attr('cellpadding'), "\n";
	#$_->dump();
	next if($_->attr('cellpadding') ne "4");
	#print STDERR "width: ", $_->attr('width'), "\n";
	next if(not defined($_->attr('width'))
		or $_->attr('width') ne "585");
	#print STDERR "got through ...\n";
	
	#-- okay, parse this table now (each table is one show)
	my @el = $_->content_list();
	die unless(ref($el[0]) eq "HTML::Element" and $el[0]->tag eq "tr");

	@el = $el[0]->content_list();

	$_ = shift @el; #-- in this column there's the logo of the tv station
	$icons{$grab->{'channel'}} = $_->look_down('_tag' => 'img')->attr('src')
	  unless(exists($icons{$grab->{'channel'}}));

	$_ = shift @el; #-- there we should have the time when our show begins ...
	die "unable to extract time-information from html code, content:\n", $_->as_text()
	  unless($_->as_text() =~ m/([0-2][0-9])\.([0-5][0-9])/);
	$begintime = $1 * 3600 + $2 * 60;
	$begintime += 86400 if($grab->{'lasttime'} >= 86400); 

	#print STDERR "start: $begintime\n";
	my $start = parse_local_date(DateCalc($day, "+ $begintime seconds"), $TZ);
	my ($start_base, $start_tz) = @{date_to_local($start, $TZ)};
	$show{"start"} = UnixDate($start_base, '%q') . " $start_tz";

	#warn("DEBUG: show out of cronological order, beginning: $1.$2!") 
	#  if($debug && $begintime < $grab->{'lasttime'});
	next if ($begintime < $grab->{'lasttime'});
	
	$_ = shift @el; #-- here we should have: popup url(if any), name of show, end time, showview number
	my @td=$_->content_list();
	
	die unless(ref($td[0]) eq "HTML::Element" and $td[0]->tag eq "span" and $td[0]->attr("class") eq "headline");
	my $span = ($td[0]->content_list())[0];

	if (ref($span) eq "") {
	    convert_cp1252_chars(\$span);
	    $span =~ s/\s*\([^\(]+\)\s*$//;
	    if ($span =~ s/\s*(\d+)\.\sTeil//gi) {
		#- strip episode number from title field
		$show{q(episode-num)} = [ [ $1, "onscreen" ] ];
	    }

	    t "show title: $span";
	    $show{title} = [[ $span, $lang ]];
	} 
	elsif (ref($span) eq "HTML::Element" and $span->tag eq "a") {
	    $popup = "http://programm.tvtoday.de" . $span->attr('href');

	    my $tag = (($span->content_list())[0]->content_list())[0];
	    die unless(ref($tag) eq "HTML::Element" and $tag->tag eq "u");
	    
	    my $title = ($tag->content_list())[0];

	    convert_cp1252_chars(\$title);

	    $title =~ s/\s*\([^\(]+\)\s*$//;
	    if ($title =~ s/\s*(\d+)\.\sTeil//gi) {
		#- strip episode number from title field
		$show{q(episode-num)} = [ [ $1, "onscreen" ] ];
	    }
	    
	    $show{title} = [[ $title, $lang ]];
	} 
	else { die }

	die unless(ref($td[1]) eq "HTML::Element" and $td[1]->tag eq "span" and $td[1]->attr("class") eq "text");
	
	# we must not die if tvtoday.de doesn't serve stop-time-info and a
	# showview number, it doesn't happen often, but it unfortunately
	# happens (without a reason, I think)
	#
	#die unless($td[1]->as_text() =~ m/Min\. bis ([12]?[0-9])\.([0-5][0-9])\s+ShowView ([0-9\-]+)/);
	if($td[1]->as_text() =~ m/Min\. bis ([012][0-9])\.([0-5][0-9])(?:\s+Showview ([0-9\-]+))?/) {
	    $stoptime = $1 * 3600 + $2 * 60;
	    $stoptime += 86400 if($stoptime < $begintime);
	    #$show{stop} = UnixDate(DateCalc($day, "+ $stoptime seconds"), '%q');
	    my $stop = parse_local_date(DateCalc($day, "+ $stoptime seconds"), $TZ);
	    my ($stop_base, $stop_tz) = @{date_to_local($stop, $TZ)};
	    $show{"stop"} = UnixDate($stop_base, '%q') . " $stop_tz";
	    if (defined($3)) {
		#-- sometimes tvtoday.de serves invalid showview information
	    	$show{showview} = $3 unless($3 eq "99-999-999"); 
	    }
		
	}

	$grab->{"lasttime"} = defined($stoptime) ? $stoptime : ($begintime + 1);

	die unless(ref($td[3]) eq "HTML::Element" and $td[3]->tag eq "span" and $td[1]->attr("class") eq "text");

	my $desc = ($td[3]->content_list())[0];
	$desc =~ s/\021([a-z]\s+)?//gi; #- work around tvtoday.de database bug
	convert_cp1252_chars(\$desc);

	unless($opt_nosqueeze) {
	    squeeze_out_desc(\$desc, \%show);
	    convert_cp1252_chars(\$desc);
	}
	
	$desc =~ s/(^\s+|\s+$)//g;
	$show{desc} = [[ $desc, $lang ]] if(length($desc));

	read_popup($popup, \%show) if(defined($popup) && $opt_slow);
	refine_credits(\%show) if((defined($popup) && $opt_slow) || not $opt_nosqueeze);

	#-- okay, commit that data now ...
	$show{channel} = $grab->{channel};

        #-- try to construct clumps, if necessary ...
        if(defined($show{q(desc)}) 
           && $show{q(desc)}->[0][0] =~ m/^anschl\.\s+(.*)/) {
            my $clumpname = $1;

            delete $show{q(desc)};
            $show{q(clumpidx)} = '0/2'; # first of two shows ...
            push @writebuffer, \%show; 

            my %newshow;
            foreach(qw(start stop channel)) { $newshow{$_} = $show{$_}; }
            $newshow{q(clumpidx)} = '1/2'; # second show ...

            #- $clumpname may contain a extra VPS start time ...
            if($clumpname =~ s/\s+\(VPS ([012]?[0-9])\.([0-6][0-9])\)//) {
                $newshow{q(vps-start)} = $newshow{q(start)};
                substr($newshow{"vps-start"}, 8, 4) = sprintf("%02d%02d", $1, $2);
            }
            
            warn("title of clumped show contains problematic chars, please take care")
              if($clumpname =~ m/[,;:\*]/);
              
            $newshow{q(title)} = [[ $clumpname, $lang ]]; 
            push @writebuffer, \%newshow;
        }
        else {
            #-- common clumpless show, write out ...
            push @writebuffer, \%show; 
        }

	last if($grab->{"lasttime"} >= 86400 && !$grab->{"lastday"});
    }

    undef($grab->{url});
    return;
}



#-- read_popup($url, %$show) -- read the popup file and add the retrieved data into the %show hash
my $warned_discarding_two_channel = 0;
sub read_popup($$) {
    my $tb = HTML::TreeBuilder->new();
    my $url = shift;
    my $show = shift;
    my $htmldata = get_page($url);

    return unless(defined($htmldata));
    $tb->parse($htmldata) or die "cannot parse content of $url\n";
    $tb->eof;
    
    #-- scan the "data" column at the left first ...
    my $col = $tb->look_down('_tag' => 'table', 'width' => '170');
    #die "cannot find left column in retrieved popup data:\n$got\n" unless ($col);
    #
    # we mustn't assume that this table is actually there, there was at
    # least one popup yet, that didn't provide it (okay, don't know what
    # an empty popup window is good for, but who knows ...)
    $tb->delete(), return unless($col);

    foreach ($col->content_list()) {
	warn "something else but tr-tag found below table-tag, he?", next 
	  unless(ref($_) eq "HTML::Element" and $_->tag eq "tr");
	
	my @td;
	@td = ($_->content_list());
	
	warn "below <tr> there should be a <td>, no here :(", next 
	  unless(ref($td[0]) eq "HTML::Element" and $td[0]->tag eq "td");
	@td = ($td[0]->content_list());

	warn "content found below <td>, tag expected, ignoring", next 
	  unless(ref($td[0]) eq "HTML::Element");
	next unless ($td[0]->tag eq "span");

	#-- okay, we've got a span!
	warn "first span's not of headline-class", next 
	  unless($td[0]->attr('class') eq 'headline');

	#FIX: td[1] doesn't have to be <br>, it may also be just some whitespace !!
	#warn "expected <br> as td[1], not found, ignoring this span", next unless($td[1]->tag eq "br");
	warn "td[2] should be a span of text-class, couldn't be found here, sorry.", next unless($td[2]->tag eq "span" and $td[2]->attr('class') eq 'text');

	my $headline = ($td[0]->content_list())[0];
	my $content = ($td[2]->content_list())[0];
	$content =~ s/(^\s|\s$)//g;
	
	if ($headline =~ m/ShowView:/) {
	    die unless($content =~ m/(?:ShowView )?([0-9\-]+)/);
	    $show->{"showview"} = $1 unless($1 eq "99-999-999");
	} 
	elsif ($headline =~ m/Genre:/) {
	    $show->{"category"} = [[ $content, $lang ]];
	} 
	elsif ($headline =~ m/Regie:/) {
	    my @tmp = split m/\s*,\s*/, $content;
	    add_credits($show, 'director', @tmp);
	} 
	elsif ($headline =~ m/Darsteller:/) {
	    my @tmp = split m/\s*,\s*/, $content;
	    add_credits($show, 'actor', @tmp);
	} 
	elsif ($headline =~ m/FSK:/) {
	    die unless($content =~ m/ab ([0-9]+)/);
	    $show->{rating} = [ [ $1, 'FSK' ] ]
	} 
	else {
	    warn "haven't heard of headline $headline yet, adding to description";
	    my $add = "$headline: $content";
	    if ($show->{desc}) {
		$show->{desc}->[0]->[0] .= " $add";
	    } 
	    else {
		$show->{desc} = [ [ $add, $lang ] ];
	    }
	}
    }

    #-- well, now let's have a look for the main column
    $col = $tb->look_down('_tag' => 'td', 'width' => '270');
    die "cannot find main column in retrieved popup data" unless ($col);
    foreach ($col->content_list()) {
	next unless(ref($_) eq "HTML::Element");
	next unless($_->tag eq "span");
	die unless($_->attr('class') eq "text");

	(my $add = ($_->content_list())[0]) =~ s/\x00+//g;
	$add =~ s/\s+$//;
	convert_cp1252_chars(\$add);
	if ($show->{desc}) {
	    $show->{desc}->[0]->[0] .= " * $add";
	} 
	else {
	    $show->{desc} = [ [ $add, $lang ] ];
	}
	last;
    }

    #-- write feature defaults
    $show->{"video"} = { present => 1, colour => 1 };
    $show->{"audio"} = { present => 1, stereo => "mono" };


    #-- last but not least: care for flags that might be available
    foreach ($tb->look_down('_tag' => 'span', 'class' => 'text-mini')) {
	$_ = ($_->content_list())[0];
	if (m/Untertitel fr Hrgeschdigte/) {
	    $show->{"subtitles"} = [{ type => 'teletext' }];
	} 
	elsif (m/schwarzwei/) {
	    $show->{"video"}->{"colour"} = 0;
	} 
	elsif (m/Stereoton/) {
	    $show->{"audio"}->{"stereo"} = "stereo";
	} 
	elsif (m/Zweikanalton/) {
	    warn "discarding two-channel sound flag"
		unless $warned_discarding_two_channel++;
	    #-- show is broadcast in two languages, but we don't
	    #-- know in which ones ... how to store that?
	} 
	elsif (m/Dolby Surround/) {
            $show->{"audio"}->{"stereo"} = "surround";
	}
	else {
	    warn "unknown show feature: $_";
	}
    }


    #-- okay, refine category attribute
    my @newdesc;
    refine_category_attr(\@newdesc, $show);
    if (scalar(@newdesc) > 0) { 
	my $haddesc = ($show->{"desc"} ? $show->{"desc"}->[0][0] . " * " : "");
	my $newdesc = join " * ", (grep $_, @newdesc);

	unless(index($haddesc, $newdesc) > -1) { 
	    $show->{"desc"} = [[ "$haddesc$newdesc", $lang ]];
	}
    }


    #-- okay, we're done, delete what we don't need and return ...
    $tb->delete();
}


#-- squeeze_out_desc($$desc, %$show)
sub squeeze_out_desc($$) {
    my $desc = shift;
    my $show = shift;
    my @newdesc;


    # try to match <category>, <country> <year>; R: <names>; D: <names> construct
    # where <country>/<year> or the [RD]: stuff may be missing ...
    if(my @parts = ($$desc =~ m/^\s*(\(([^\)]*)\))?\s+([^,;0-9]+)(,?\s+([^,;]+)\s+([12][09][0-9]{2}(?:[\/-][0-9]{2})?))?\s*; (?:(?:; )?(Buch\/Regie|R): ([^;]+))?\s*((?:; )?D: (.+))?\s*$/)) {
        t "split rule: <category>, <country> <year> ...";
	$$desc = "";

	#-- $parts[1] is the show title in English (doesn't have to be available)
	#-- second title doesn't have to be Enlish, if you've got a French
	#-- movie, $parts[1] will be in French! => undef
	$show->{"title"}->[1] = [ $parts[1], undef ] if($parts[1]);

	#-- $parts[2] is the show's genre in German
	$show->{"category"} = [[ $parts[2], $lang ]];
	warn "misdetected category: $parts[2]" 
	  if($parts[2] =~ m/\d{4}/);
		      
	if ($parts[3]) {
	    #-- $parts[4] specifies where the film was made, [5] when
	    $show->{"country"} = [[ $parts[4], $lang ]];
	    $show->{"date"} = parse_date_data($parts[5]);
	}

	#-- $parts[7] specifies the director (German: Regisseur)
	if (defined($parts[7])) {
	    $parts[7] =~ s/\s*u.a.\s*$//;
	    $parts[7] =~ s/\([^\(\)]+\)//g; 
	    $parts[7] =~ s/&amp;/&/g;

	    my @people = split m/(?:\s+und\s+|\s*[,;]\s*)/, $parts[7];
	    my @jobs;
	    if ($parts[6] eq 'R') {
		@jobs = qw(director);
	    }
	    elsif ($parts[6] eq 'Buch/Regie') {
		@jobs = qw(director writer);
	    }
	    else {
		warn "don't understand 'director' type $parts[6]";
	    }
	    
	    push @{$show->{credits}{$_}}, @people
	      foreach @jobs;
	}

	if (defined($parts[9])) {
	    #-- $parts[9] specifies the actors (German: Darsteller)
	    $parts[9] =~ s/\s*u.a.\s*$//;
	    $parts[9] =~ s/\([^\(\)]+\)//g; 
	    $parts[9] =~ s/&amp;/&/g;

	    my @actor = split m/(?:\s+und\s+|\s*[,;]\s*)/, $parts[9];
	    push @{$show->{"credits"}{"actor"}}, @actor;
	}
    } 
    else {
        t "split rule: dot splitting";
	my @data = split "", $$desc;
	s/(^\s|\s$)//g foreach(@data); #CHG#

	for(0 .. (scalar(@data) - 1)) {
	    t "dot-split part $_: " . $data[$_];
	}

	if(scalar(@data) == 3 
	   && not($data[1] =~ m/[\w]+:/) #- FIX false positive: tvtoday.de seems to publish "guests: <names>" here some (rare) times :-(  
	   && $data[2] =~ m/^Mit (.*?)$/) {
	    my $actors = $1; #- BUGFIX, cache $1 as $actors

	    # $data[0] --> sub title of show
	    # $data[1] --> genre, may be "<genre>, <country> <year" 
	    # $data[2] --> actors

	    #for ($data[0]) { s/^\s+//; s/\s+$// }
	    s/(^\s|\s$)//g foreach(@data); 
	    $show->{"sub-title"} = [[ $data[0], $lang ]];

	    if($data[1] =~ m/([^,;]+)(?:,\s+([^,;]*)\s+([12][09][0-9]{2}(?:[\/-][0-9]{2})?))?/) {
		my ($cat, $country, $date) = ($1, $2, $3);
		if ($cat =~ /\S/) {
	            warn "misdetected category: $cat" 
		      if($cat =~ m/\d{4}/);
		    $show->{"category"} = [[ $cat, $lang ]];
		}
		else { warn "bad category '$cat'" }

		if (defined $country) {
		    if ($country =~ /\S/) {
			    $show->{"country"} = [[ $country, $lang ]];
		    }
		    else { warn "bad country '$country'" }
		}

		$show->{"date"} = parse_date_data($date) if(defined($date));
	    } 
	    else {
		warn "<genre>(, <country> <year>)? expection not met, THIS SHOULD NOT HAPPEN";
		#-- try to get out here ...
		push @newdesc, $data[1];
	    }

	    my @actors = split(",", $actors); #- BUGFIX: relied on $1, which get's destroyed by insertions above
	    push @{$show->{"credits"}{"actor"}}, @actors;
	} 
	else {
	    foreach (@data) {
		if (m/^Thema: (.*)$/) {
		    push @newdesc, $_, next if($show->{"sub-title"});
		    $show->{"sub-title"} = [[ $1, $lang ]];
		    next;
		}
		    
		if (m/^\(Live\s*(.*)?\)$/) {
		    #-- it's a live show, location: $1 (if defined)
		    push @newdesc, $_; #- xmltv.dtd doesn't support it (yet) -- copy to desc field
		    #undef $_;
		    next;
		}

		if (my ($nocat, $cat, $rest1, $names, $guests, $rest2) = m/^(([^,.%^&*();]+?)((?:\s+-\s+..+?)*)|.+) - Moderation: (.+?) - Gste: (..+?)(?:\s+-\s+(.+))?$/) {
		    my @data = split_up_names($names, $show);
		    push @{$show->{"credits"}{"presenter"}}, @data;
		    my @guest_data = split_up_names($guests, $show);
		    push @{$show->{"credits"}{"guest"}}, @guest_data;

		    if(defined($cat)) {
			$show->{"category"} = [[ $cat, $lang ]];
			
			warn "misdetected category: $cat" 
			  if($cat =~ m/\d{4}/);
		    }
		    else {
		        t "no-cat match: $nocat";
			$rest1 = $nocat;
		    }
		      
		    my @rest; 
		    foreach(defined($rest1) ? split(m/\s+-\s+/, $rest1) : undef, $rest2) {
			push @rest, $_ if(defined($_) && length($_));
		    }
		    $_ = join " * ", @rest;
		    next unless length($_);
		}

		if (my ($nocat, $cat, $rest1, $names, $rest2) = m/^(([^,]+?)((?:\s+-\s+..+?)*)|.+) - Moderation: (.+?)(?:\s+-\s+(.+))?$/) {
		    my @data = split_up_names($names, $show);
		    push @{$show->{"credits"}{"presenter"}}, @data;

		    if(defined($cat)) {
			$show->{"category"} = [[ $cat, $lang ]];

			warn "misdetected category: $cat" 
			  if($cat =~ m/\d{4}/);
		    }
		    else {
		        t "no-cat match: $nocat";
			$rest1 = $nocat;
		    }
			
		    my @rest; 
		    foreach(defined($rest1) ? split(m/\s+-\s+/, $rest1) : undef, $rest2) {
			push @rest, $_ if(defined($_) && length($_));
		    }
		    $_ = join " * ", @rest;
		    next unless length($_);
		}

		if (my ($type, $names) = m/^\s*(Reporter:|Moderation:|Kommentar:|Gast:|Gste:|Mit|Film von)\s+(?!de[nm]\s+)(.*?)\s*$/) {
		    $names =~ s/\s*u.a.\s*$//;
		    $names =~ s/\([^\(\)]+\)//g; #-- remove all brackets, that further describe the person
		    $names =~ s/&amp;/&/g; #- the semicolon behind &amp; causes trouble, replace it -- other entities shouldn't appear ...

		    #-- try to split up ...
		    my @data = split_up_names($names, $show);

		    if(scalar(@data) > 1 || scalar($data[0] =~ m/\s/g)) {
			#-- if there's only one word, we seem to be wrong ...
			#-- ignore and go on without squeezing out too much info
			if($type eq "Reporter:" || $type eq "Moderation:") {
			    push @{$show->{"credits"}{"presenter"}}, @data;
			} 
			elsif($type eq "Kommentar:") {
			    push @{$show->{"credits"}{"commentator"}}, @data;
			} 
			elsif($type eq "Gast:" || $type eq "Gste:" || $type eq "Mit") {
			    if($type eq "Mit" && $names =~ m/(?:Rundschau|Sport|Wetter|Nachrichten|Wirtschaft)/ || $names =~ m/^".*"$/) {
				push @newdesc, $_;
				next;
			    }
				
			    push @{$show->{"credits"}{"guest"}}, @data;
			} 
			elsif($type eq "Film von") {
			    push @{$show->{"credits"}{"producer"}}, @data;
			} 
			else { die }

			undef $_;
			next;
		    }
		}

		if (m/^\s*u.a.\s*/) {
		    # "u.a." means and others, this is a left over thing,
		    # e.g. if you've got a comedy series with comedian1,
		    # comedian2 + separator + 'u.a.' -> simply ignore
		    undef $_;
		    next;
		}

		if (m/^\s*([^,;!-%\(\)=\+]+), ([^,;!-%\(\)=\+]+) ([12][90][0-9]{2}(?:[\/-][0-9]{2})?)\s*$/) {
		    $show->{"category"} = [[ $1, $lang ]];
		    $show->{"country"} = [[ $2, $lang ]];
		    $show->{"date"} = parse_date_data($3);

		    $_ = $1; warn "misdetected category: $_" if(m/\d{4}/);

		    undef $_;
		    next;
		}

		if (my ($category, $subtitle) = m/^(.*?) - (.*?)( - ((Free-TV|Deutschland)-Premiere(; )?)?([0-9]+\/(I{1,3}V?|V|VI{1,3}))?)?$/) {
		    if ($category =~ m/$category_regexp/o 
		        and not $subtitle =~ m/ (am|um) [0-9\.\:]+/
		        and not $subtitle =~ m/ - /) {
			$show->{"category"} = [[ $category, $lang ]];
			$show->{"sub-title"} = [[ $subtitle, $lang ]];

			warn "misdetected category: $category" 
			  if($category =~ m/\d{4}/);

			next;
		    }
		}

		#-- don't know what it means, ....
		push @newdesc, $_;
	    }
	}
    }

    unless(defined($show->{"category"})) {
    foreach(@newdesc) {
	next unless(my ($leftpart, $category, $rightpart) = m/$category_regexp/o);
	$leftpart = "" unless(defined($leftpart));
	$rightpart = "" unless(defined($rightpart));
	
	warn "misdetected category: $category" 
	  if($category =~ m/\d{4}/);

	warn("already had category for ".$show->{"title"}->[0][0]." available (".$show->{"category"}->[0][0]."), replacing by '$category', this should not happen") if($show->{"category"});
	warn "misdetected category: $category" if($category =~ m/\d{4}/);
	$show->{"category"} = [[ $category, $lang ]];

	$leftpart =~ s/(^\s|\s$)//g;
	$rightpart =~ s/(^\s|\s$)//g;
	
	if(not length("$leftpart$rightpart")) {
	    #- $#newdesc --;
	    undef $_;
	} 
	elsif($rightpart =~ m/^mit\s+(.*)$/) { #-- mit means "with" in German
	    my @data = split_up_names($1, $show);
	    add_credits($show, 'presenter', @data);
	    
	    undef $_ unless(length($leftpart));
	} 
	elsif($rightpart =~ m/^von(?: und mit)?\s+(.+)$/) { #-- mit means "with" in German
	    my @data = split_up_names($1, $show);
	    add_credits($show, 'producer', @data);
	    
	    undef $_ unless(length($leftpart));
	} 

	last;
    }
    }

    if(defined($show->{"category"})) {
        foreach (@{$show->{"category"}}) {
            die if not defined($_->[0]);
	    warn "we assigned some strange category: $_->[0]"
	      if($_->[0] =~ m/\d{4}/);
	}     
    }

    refine_category_attr(\@newdesc, $show);
    $$desc = join " * ", (grep $_, @newdesc);
}

#-- add_credits(%$show, $credit, @people) -- add names to a <credits>
# subelement but only if they are not already there.
#
# Assumption: nothing is removing from credits lists.
#
sub add_credits($$@) {
    my ($show, $credit, @people) = @_;
    my %seen;
    foreach (@{$show->{credits}->{$credit}}) { $seen{$_}++ }
    push @{$show->{credits}->{$credit}},
      grep { not $seen{$_}++ } @people;
}

#-- refine_category_attr(@$desc, %$show) -- refine category attribute of %show
sub refine_category_attr($$) {
    my $desc = shift;
    my $show = shift;

    #-- refining category attributes ...
    return if not exists $show->{"category"};
    foreach (@{$show->{"category"}}) {
	die if not defined($_->[0]);
	$_->[0] =~ s/(^\s|\s$)//g;

	if($_->[0] =~ s/\s+(?:frei\s+)?(nach|von|mit)\s+([^\d]*?)\s*$//) {
	    my @data = split(m/(?:\sund\s|[,;])/, $2);

	    if ($1 eq "nach") { push @{$show->{"credits"}{"writer"}}, @data; }
	    elsif ($1 eq "von") { push @{$show->{"credits"}{"producer"}}, @data; }
	    elsif ($1 eq "mit") { push @{$show->{"credits"}{"presenter"}}, @data; }
	    else { die }
	}

	if($_->[0] =~ s/\s*(\d+\. Staffel)\s*//) {
	    #-- really doesn't belong into category, throw out to description
	    push @$desc, $1;
	}

	#-- okay, the last word should be the actual category now,
	#   discard everything else back to description
	if($_->[0] =~ s/^\s*(.*?\s+)(?=[\w\-\/]+\s*$)//) {
	    push @$desc, "$1$_->[0]";
	}

	#-- discard whole entry if we don't have any text left ...
	$_->[0] =~ s/(^\s|\s$)//g;
	undef $_ unless(length($_->[0]));
    }

    # We may have set some category elements to undef, but they
    # shouldn't be left there.
    #
    $show->{"category"} = [ grep { $_ } @{$show->{"category"}} ];


    # DEBUG FEATURE
    # check the stored categories against our list ...
    1 && return;

    foreach (@{$show->{"category"}}) {
	my $title = $show->{"title"}->[0][0];
	
	warn "show '$title' has invalid category-language tag assigned: $_->[1]"
	  unless($_->[1] eq $lang);

	next unless(m/$category_regexp/o);
	warn "show '$title' has strange category assigned: $_->[0]" 
	  if(length("$1$3"));
    }
}



#-- refine_credits(%$show) -- refine credits listed below %show hash
sub refine_credits($) {
    my $show = shift;
    
    foreach(keys(%{$show->{"credits"}})) {
	foreach(@{$show->{"credits"}{$_}}) {
	    s/(^\s|\s$)//g;

	    #-- remove leading articles in front of group's names, e.g. bands etc.
	    s/^de[rnm]\s+//g; 

	    #-- trim leading proffession-names, etc ...
	    #-- this is stupidly given with all these faked judgement shows (for juges and lawyers who probably even don't have and deserve their title ...)
	    s/^(Anwalt|Anwltin|Anwlten|Richter(in)?)\s+//g;
	}
    }
}



#-- get channel logos
sub get_icons() {
    my %icons;
    my $url="http://programm.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=";
    my $chan;
    my $tag;
    my $addr;

    my $bar = new XMLTV::ProgressBar('grabbing icons', scalar(keys(%channels)))
      if not $opt_quiet;

    foreach (keys %channels) {
        my $tb = new HTML::TreeBuilder();
	my $htmldata = get_page($url.$_);
	next unless(defined($htmldata));
        $tb->parse($htmldata) or die "cannot parse content of $url$_\n";
	$tb->eof;

        $tag = $tb->look_down('_tag' => 'img',
        sub {
            return ($_[0]->attr('src') =~ m/^\/tv\/programm\/bilder\/senderlogos\//);
        });

        update $bar if not $opt_quiet;

        unless(ref($tag) eq "HTML::Element") {
                $tb->delete;
                next;
        };

        $icons{$_} = $tag->attr('src');
        $tb->delete;
    }
    $bar->finish() if not $opt_quiet;
    
    return %icons;
}



#-- get channel listing
sub get_channels() {
    my %channels;
    my $url="http://programm.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=alle";

    my $tb=new HTML::TreeBuilder();
    my $htmldata = get_page($url);

    die "successful grab of $url required, stopping here."
	unless(defined($htmldata));

    $tb->parse($htmldata) or die "cannot parse content of $url\n";
    $tb->eof;
    foreach ($tb->look_down('_tag' => 'select', 'name' => 'sender')->content_list()) {
	next unless(ref($_) eq "HTML::Element");
	warn "unexpected HTML::Element $_->tag", next if($_->tag ne "option");

	my $station_name = $_->as_text();
	$station_name =~ s/(?:^\s|\s$)//g;

	next if ($station_name eq "_______");
	next if ($station_name eq "Regional");
	next if ($station_name eq "Haupt");
	next if ($station_name eq "alle");
	next if ($station_name eq "Haupt+Reg.");
	next if ($station_name eq "Sparten");
	next if ($station_name eq "Ausland");
	next if ($station_name eq "PREMIERE/");
	next if ($station_name eq "DIGITAL-TV");

	$channels{uri_escape($_->attr("value"))} = $station_name;
    }



    #--- check, whether we got an up to date version of the page ...
    #-- calculate expected date ...
    my $utc_now = Date_ConvTZ(ParseDate('now'), "", "UTC");
    my $now = ParseDate(UnixDate(@{date_to_local($utc_now, $TZ)}[0], "%q"));
    print STDERR "current date in CE(S)T is: $now\n" if($debug);
    my $expect = UnixDate($now, "%e.%f.%Y");
    $expect =~ s/ //g;

    $_ = $tb->look_down('_tag' => 'span', 'class' => 'text-weiss');
    die("cannot find date on requested page") 
      unless ($_->as_text() =~ m/([1-3]?[0-9]\.1?[0-9]\.20[0-9]{2})/);
    warn("probably using information from outdated cache,\ncurrent date according to tvtoday.de is $1, expected $expect.\n") 
      unless ($expect eq $1);

    $tb->delete;
    return %channels;
}


#-- split_up_names($names, %$show) :: Split up names into returned array
sub split_up_names($$) {
    my $names = shift;
    my $show = shift;

    $names =~ s/, unter Mitwirkung von\s+/ und /g; #- replace 'with help of' by simple and to allow match below
    my @data = split(m/\s*[,;]\s*/, $names);

    if(scalar(@data) == 2) {
	#-- check for "<name(s)>, <country> <year>" construct
	if($data[1] =~ m/\s*([^,;]*)\s+([12][09][0-9]{2}(?:[\/-][0-9]{2})?)\s*$/) {
	    $show->{"country"} = [[ $1, $lang ]];
	    $show->{"date"} = parse_date_data($2);

	    $#data --;
	    $names = $data[0];
		
	} 
	elsif(scalar($data[1] =~ m/\s/g) == 0) {
	    #- we most probably have a country specification
	    #- here, treat it as such, ... and hope it's right
	    $show->{"country"} = [[ $data[1], $lang ]];

	    $#data --;
	    $names = $data[0];
	}
    }

    if(scalar(@data) == 1 && $names =~ m/\s+und\s+/) {
	# looks like it didn't work, try splitting by 'und' (== and)
	@data = split(m/\s+und\s+/, $names);

	#-- check that we didn't have a "hername + hisname familyname or name + name" construct ...
	if(scalar(@data) == 2) {
	    @data = $names #-- ignore split in that case
	      if (scalar($data[0] =~ m/\s/g) == 0 && scalar($data[1] =~ m/\s/g) <= 1);
	}
    }

    return @data;
}


#-- parse_date_data($d) :: Parse the given "yyyy([-/]yy)?" date down to "yyyy" only
sub parse_date_data($) {
    my $date = shift;
    warn("bad date '$date' found, returning undef."), return(undef) 
      unless($date =~ m/((?:19|20)[0-9]{2})(?:(?:[-\/])([0-9]{2}))?/);

    #-- return if it's a plain 'yyyy' date ...
    return $date unless(defined($2));

    my $century = substr($date, 0, 2);
    if(substr($date, 2, 2) > $2) {
	warn("bad date '$date' found, returning undef."), return(undef) 
	  unless($century == 19);
	$century ++;
    } 

    return $century * 100 + $2;
}



#-- get_page($url) :: try to download $url via http://, look for closing </body> tag or die
sub get_page($) {
    my $url = shift;
    my $retry = 0;

    local $SIG{__DIE__} = sub { die "\n$url: $_[0]" };
    #print STDERR "get_page: $url\n";
    
    while($retry < 2) {
	my $got = eval { get_nice($url . ($retry ? "&retry=$retry" : "")); };
	$retry ++;

	next if($@); # unable to download, doesn't look too good for us.

	die "retrieved webpage doesn't look like a tvtoday.de page, maybe a proxy error?"
	  unless(index($got, "<title>TV TODAY</title>"));

	#-- page seems to be complete, if we have a </body> tag ...
	return $got unless(index($got, "</body>") < 0);

	#-- be nice to our server, let's wait extra ...
	sleep(rand($retry * 5)) unless($debug);
	warn "got incomplete webpage from tvtoday.de" if($debug);
    }

    warn "cannot grab webpage $url, giving up after trying $retry times";
    return undef;
}
