#!/usr/bin/perl

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell
=pod

=head1 NAME

tv_grab_uk_rt - Grab TV listings for Britain and Ireland

=head1 SYNOPSIS

tv_grab_uk_rt --help

tv_grab_uk_rt [--config-file FILE] --configure [--gui OPTION]

tv_grab_uk_rt [--config-file FILE] [--output FILE] [--quiet]
              [--days N] [--offset N]
              [--list-channels]

=head1 DESCRIPTION

Output TV and radio listings in XMLTV format for many stations
available in the United Kingdom and Republic of Ireland.  The data
comes from a machine-readable file produced by the Radio Times
website.

=head1 USAGE

First you must run B<tv_grab_uk_rt --configure> to choose which stations you
want to receive.  Then running B<tv_grab_uk_rt> with no arguments will get
about a fortnightE<39>s listings for the stations you chose.

B<--configure> Prompt for which stations to download and write the
configuration file.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--config-file FILE> Set the name of the configuration file, the default is
B<~/.xmltv/tv_grab_uk_rt.conf>.  This is the file written by B<--configure> and
read when grabbing.

B<--output FILE> When grabbing, write output to FILE rather than standard
output.

B<--quiet> Suppress the progress messages normally written to standard error.

B<--days N> When grabbing, grab N days of data instead of all available.

B<--offset N> Start grabbing at today + N days.

B<--list-channels> List channels.

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

Note that tv_grab_uk_rt always downloads data for all days and then filters
out the days specified with --days and --offset. It is therefore more
efficient to omit --days and --offset and use all the returned data.

=head1 SEE ALSO

L<xmltv(5)>, L<http://www.radiotimes.beeb.com/>

=head1 AUTHOR

Ed Avis, ed@membled.com

=cut

use warnings;
use strict;
use XMLTV::Version '$Id: tv_grab_uk_rt.in,v 1.93 2006/11/30 19:36:30 mattiasholmlund Exp $ ';
use XMLTV::Capabilities qw/baseline manualconfig cache share preferredmethod/;
use XMLTV::Description 'United Kingdom/Ireland (Radiotimes)';
use XMLTV::PreferredMethod qw/allatonce/;
use Getopt::Long;
use HTML::Entities;
use Date::Manip; Date_Init('TZ=+0000');
use XMLTV::Config_file;
use XMLTV::Get_nice;
use XMLTV::Ask;
use XMLTV::ProgressBar;
use XMLTV::Memoize; XMLTV::Memoize::check_argv 'get_nice';
use XMLTV::DST;
use XMLTV::Usage <<END
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--quiet]
List channels only: $0 --list-channels [--output FILE] [--quiet]
END
  ;
$XMLTV::Get_nice::Delay = 0; # since this is intended for grabbing
my $channel_list_uri = 'http://xmltv.radiotimes.com/xmltv/channels.dat';

sub configure();

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
	*t = sub {};
	*d = sub { '' };
    }
    else {
	*t = \&Log::TraceMessages::t;
	*d = \&Log::TraceMessages::d;
	Log::TraceMessages::check_argv();
    }
}

GetOptions('help'          => \ my $opt_help,
       'configure'     => \ my $opt_configure,
       'config-file=s' => \ my $opt_config_file,
       'gui:s'         => \ my $opt_gui,
       'output=s'      => \ my $opt_output,
       'share=s'       => \ my $opt_share, # also undocumented
       'quiet'         => \ my $opt_quiet,
       'list-channels' => \ my $opt_list_channels,
       'days=s'        => \ my $opt_days,
       'offset=s'      => \ my $opt_offset,
      )
  or usage(0);

if ($opt_help) {
    usage(1);
}

# share/ directory for storing channel mapping files.  This next line
# is altered by processing through tv_grab_uk_rt.PL.  But we can use
# the current directory instead of share/tv_grab_uk for development.
#
# The 'source' file tv_grab_uk_rt.in has $SHARE_DIR undef, which means
# use the current directory.  In any case the directory can be
# overridden with the --share option (useful for testing).
#
my $SHARE_DIR='/usr/share/xmltv'; # by grab/uk_rt/tv_grab_uk_rt.PL
$SHARE_DIR = $opt_share if defined $opt_share;
my $OUR_SHARE_DIR = (defined $SHARE_DIR) ? "$SHARE_DIR/tv_grab_uk_rt" : '.';
(my $CHANNEL_NAMES_FILE = "$OUR_SHARE_DIR/channel_ids") =~ tr!/!/!s;

XMLTV::Ask::init($opt_gui);

# Stuff for the root <tv> element.
my %tv_credits = ( # 'source-info-url'     => "todo",
           'source-info-name'    => 'Radio Times',
           'generator-info-name' => 'XMLTV',
           'generator-info-url'  =>
           'http://membled.com/work/apps/xmltv/',
         );

# Tables to convert between Radio Times and XMLTV ids of channels.
# The way to access these is through the routines rt_to_xmltv() and
# xmltv_to_rt(), not directly.  Those will deal sensibly with a new RT
# channel that isn't mentioned in the file.
#
my (%rt_to_xmltv, %xmltv_to_rt, %extra_dn, %icon_urls);
my $line_num = 0;
foreach (XMLTV::Config_file::read_lines($CHANNEL_NAMES_FILE, 1)) {
    ++ $line_num;
    next unless defined;
    my $where = "$CHANNEL_NAMES_FILE:$line_num";
    my @fields = split /\|/;
    die "$where: wrong number of fields"
      if @fields < 2 or @fields > 4;

    my ($xmltv_id, $rt_id, $extra_dn, $icon_url) = @fields;
    warn "$where: RT id $rt_id seen already\n"
      if defined $rt_to_xmltv{$rt_id};
    $rt_to_xmltv{$rt_id} = $xmltv_id;
    warn "$where: XMLTV id $xmltv_id seen already\n"
      if defined $xmltv_to_rt{$xmltv_id};
    $xmltv_to_rt{$xmltv_id} = $rt_id;

    $extra_dn{$xmltv_id} = $extra_dn if defined $extra_dn;
    $icon_urls{$xmltv_id} = $icon_url if defined $icon_url;
}

say << 'END' unless $opt_quiet;
All data is the copyright of the Radio Times website
<http://www.radiotimes.com> and the use of this data is restricted to
personal use only.
END
;

# Whatever we're doing, we need the channel list.
my $bar = new XMLTV::ProgressBar({name => 'finding channels', count => 1})
  if not $opt_quiet;
my $channel_list = get_nice $channel_list_uri;
my (%channels, %seen_rt_id, %seen_name);
foreach (split /\n/, $channel_list) {
    chomp;
    /^(\d+)\|(.+)/ or die "bad line in channel list: $_";
    my ($rt_id, $name) = ($1, $2);
    $seen_rt_id{$rt_id}++ && die "channel with RT id $rt_id seen twice";
    $seen_name{$name}++ && die "channel named '$name' seen twice";
    my $xmltv_id = $rt_to_xmltv{$rt_id};
    if (not defined $xmltv_id) {
        if (not $opt_quiet) {
            warn "RT id $rt_id ($name) not known in channel ids file\n";
        }
	$xmltv_id = "C$rt_id.radiotimes.com";
    }
    my @names = ([ $name ]);
    
    my $icon_url = $icon_urls{$xmltv_id};
    my @icon = { 'src' => $icon_url } if $icon_url;
    for ($extra_dn{$xmltv_id}) { push @names, [ $_ ] if defined }
    if(@icon) {
        $channels{$xmltv_id} = { id => $xmltv_id,
			         rt_id => $rt_id,
			         'display-name' => \@names,
			         'icon' => \@icon };
    } else {
        $channels{$xmltv_id} = { id => $xmltv_id,
			         rt_id => $rt_id,
			         'display-name' => \@names };
    }
}
update $bar if $bar;
$bar->finish() if $bar;

if (not $opt_quiet) {
    foreach (keys %xmltv_to_rt) {
        warn "channel $_ ($xmltv_to_rt{$_}) not seen on site"
          if not exists $channels{$_};
    }
}

my %g_args = ();
if (defined $opt_output) {
    my $fh = new IO::File ">$opt_output";
    die "cannot write to $opt_output\n" if not $fh;
    #    binmode $fh or die "cannot set binmode for output: $!";
    %g_args = (OUTPUT => $fh);
}

if ($opt_list_channels) {
    die "--list-channels can't be given with --configure\n"
      if $opt_configure;
    my $writer = new XMLTV::Writer(%g_args, encoding => 'ISO-8859-1');
    $writer->start(\%tv_credits);
    foreach (sort keys %channels) {
	delete $channels{$_}{rt_id};
	$writer->write_channel($channels{$_});
    }
    $writer->end;
    exit;
}

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_uk_rt', $opt_quiet);

if ($opt_configure) {
    configure();
    exit;
}

# Ask the user which channels to download, and write $config_file.
#
# Uses global %channels hash.
#
# FIXME commonize with other grabbers.
#
sub configure() {
    #    local $Log::TraceMessages::On = 1;

    XMLTV::Config_file::check_no_overwrite($config_file);

    # FIXME need to make directory
    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
    t 'channels: ' . d \%channels;

    my %chan_id_to_name;
    foreach my $chan_id (keys %channels) {
        $chan_id_to_name{$chan_id} =
            $channels{$chan_id}->{'display-name'}->[0]->[0];
    }

    my @chan_ids = sort {$chan_id_to_name{$a} cmp $chan_id_to_name{$b}}
        keys %chan_id_to_name;

    my @questions;
    foreach my $chan_id (@chan_ids) {
        push @questions, "Add channel ".$chan_id_to_name{$chan_id}."? ";
    }
    my @answers = ask_many_boolean(1, @questions);

    for (my $i=0; $i < $#chan_ids; $i++) {
        if ($answers[$i]) {
            print CONF "channel ".$chan_ids[$i]."\n";
        }
    }

    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");
    exit();
}

# Grabbing.  Start by reading config file.
my @wanted_chs;
my $n = 0;
foreach (XMLTV::Config_file::read_lines $config_file) {
    ++$n;
    next if not defined;
    /^\s*channel\s+(\S+)\s*$/ or die "$config_file: $n: bad line $_\n";
    my $id = $1;
    if (not exists $channels{$id}) {
        warn "channel $id mentioned in $config_file but not on site\n";
        next;
    }
    push @wanted_chs, $id;
}
#@wanted_chs = sort keys %channels;

my %d_args = ();
if (defined( $opt_days ) or defined( $opt_offset )) {
  $opt_offset = 0 unless defined $opt_offset;
  $opt_days = 15 unless defined $opt_days;

  $d_args{offset} = $opt_offset;
  $d_args{days} = $opt_days;
  $d_args{cutoff} = "000000";
}

my $writer = new XMLTV::Writer(%g_args, %d_args, encoding => 'ISO-8859-1');
$writer->start(\%tv_credits);
foreach (@wanted_chs) {
    my %h = %{$channels{$_}};
    delete $h{rt_id};
    $writer->write_channel(\%h);
}

$bar = new XMLTV::ProgressBar({name => 'grabbing', count => scalar @wanted_chs})
  if not $opt_quiet;
my %warned_wrong_num_fields; # give that warning once per channel file
foreach my $ch (@wanted_chs) {
    my $c = $channels{$ch};
    my $rt_id = $channels{$ch}->{rt_id}; die if not defined $rt_id;

    # Try to get the base timezone for this channel from its name.
    my $base_tz;
    if ($c->{'display-name'}->[0]->[0] =~ /\((UTC|GMT|CET)\)\s*$/) {
	$base_tz = $1;
    }
    for ($base_tz) { $_ = 'UTC' if not defined }

    my $uri = "http://xmltv.radiotimes.com/xmltv/$rt_id.dat";
    local $SIG{__DIE__} = sub { die "$uri: $_[0]" };
    local $SIG{__WARN__} = sub { warn "$uri: $_[0]" };
    my $page = get_nice $uri;

    # Tidy up HTML entities and bad characters.  The site seems to use
    # a mixture of Latin-1 and UTF-8, I'm not sure exactly.  We want
    # our output to be in Latin-1 but HTML::Entities decides to use
    # Unicode so we have to fiddle a few entities manually first.
    #
    for ($page) {
	s/&#8212;/--/g;
	s/&#8230;/.../g;
	decode_entities $_;
	tr/\207\211\200\224/\347\311\055\055/; # bad characters
    }
    foreach (split /\n/, $page) {
	my @fields = split /\~/;
	if (@fields != 23) {
	    warn "wrong number of fields in line:\n$_\n"
	      unless $warned_wrong_num_fields{$ch}++;
	    next;
	}
	foreach (@fields) { s/^\s+//; s/\s+$//; undef $_ if not length }
	my ($title, $sub_title, $episode, $year, $director, $cast,
	    $premiere, $film, $repeat, $subtitles, $widescreen,
	    $new_series, $deaf_signed, $black_and_white, $star_rating,
	    $certificate, $genre, $desc, $choice, $date, $start, $stop,
	    $duration_mins) = @fields;
	foreach ($premiere, $film, $repeat, $subtitles, $widescreen,
		 $new_series, $deaf_signed, $black_and_white, $choice) {
	    die "true/false value not defined" if not defined;
	    if ($_ eq 'true') { $_ = 1 }
	    elsif ($_ eq 'false') { $_ = 0 }
	    else { die "bad true/false value $_" }
	}

	warn "ignoring sub-title $sub_title since episode also given\n"
	  if defined $sub_title and defined $episode;
	$sub_title = $episode if defined $episode;

	warn("missing title in: $_"), next if not defined $title;

	# Roundabout the summer time changeover they include timezone
	# in the title.
	#
	my $explicit_tz = '';
	if ($title =~ s/^\((GMT|UTC|BST|UTC\+1)\)\s*//) {
	    $explicit_tz = $1;
	}

	my %p = (channel => $ch, title => [ [ $title ] ]);
        if (defined $sub_title && 
            ($sub_title =~ /^(\d+)\/(\d+)$/ ||
             $sub_title =~ /^(\d+)\/(\d+)\s+-\s+/))
        {
            my $episode = $1 - 1;
            my $episodes = $2;

            $p{'episode-num'} = [ [ " . ${episode}/${episodes} . ", "xmltv_ns" ] ];

            $sub_title =~ s/^(\d+)\/(\d+)(?:\s+-\s+)?//;

            undef $sub_title if $sub_title =~ /^\s*$/;
        }
	for ($sub_title) { $p{'sub-title'} = [ [ $_ ] ] if defined }
	for ($year) { $p{date} = $_ if defined }
	for ($director) { $p{credits}{director} = [ $_ ] if defined }
	if (defined $cast) {
	    my @cast;
	    if ($cast =~ tr/|//) {
		@cast = split /\|/, $cast;
		# Each bit is in the format 'part*actor' and it seems that
		# even when part is 'director' that is the name of a
		# character.
		#
		foreach (@cast) {
		    s/^.*[*]// or warn "bad bit of cast list: $_";
		}
	    }
	    else {
		@cast = split /,/, $cast;
	    }
	    foreach (@cast) { s/^\s+//; s/\s+$// }
	    $p{credits}{actor} = \@cast;
	}
	$p{premiere} = [ '' ] if $premiere;
	push @{$p{category}}, [ 'film', 'en' ] if $film;
	$p{'previously-shown'} = {} if $repeat;
	$p{subtitles} = [ { type => 'teletext' } ] if $subtitles;
	$p{video}{aspect} = '16:9' if $widescreen;
	$p{new} = 1 if $new_series;
	# $deaf_signed ignored for now
	$p{video}{colour} = 0 if $black_and_white;
	$p{'star-rating'} = [ "$star_rating/5" ] if defined $star_rating;
	$p{rating} = [ [ $certificate, 'BBFC' ] ] if defined $certificate;
	push @{$p{category}}, [ $genre, 'en' ] if defined $genre;
	for ($desc) {
	    if (defined) {
		s!</?[A-Za-z]+>!!g;
		$p{desc} = [ [ $_, 'en' ] ];
	    }
	}
	# $choice ignored for now

	# Date, start and stop time.
	my ($yyyy, $mm, $dd);
	for ($date) {
	    die "missing date in $_" if not defined;
	    m!(\d\d)/(\d\d)/(\d{4})$! or die "bad date $_";
	    ($dd, $mm, $yyyy) = ($1, $2, $3);
	}

	$p{start} = utc_offset "$yyyy$mm$dd$start $explicit_tz", $base_tz;
	$p{stop} = utc_offset "$yyyy$mm$dd$stop $explicit_tz", $base_tz;
	if (Date_Cmp($p{start}, $p{stop}) > 0) {
	    $p{stop} = utc_offset(DateCalc("$yyyy$mm$dd$stop $explicit_tz",
					   '+ 1 day'),
				  $base_tz);
	}
	# Ignore $duration_mins since it may not be reliable.

	$writer->write_programme(\%p);
    }
    update $bar if $bar;
}
$bar->finish() if $bar;
$writer->end;
