#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell
#
# tv_grab_na_icon
#
# This script goes to the zap2it web site and downloads icon links or images
# to the share/icons directory.
#
# (C)2001 - Robert Eden, free to use under the GNU License.
#
#  Robert Eden - reden@cpan.org
#  	
#     See cvs logs entries for module history
#

=pod

=head1 NAME

tv_grab_na_icons - Grab channel icon images or links from zap2it.com

=head1 SYNOPSIS

tv_grab_na_icons [--links] [--share dir]

=head1 DESCRIPTIONS

This script goes to the zap2it web site and downloads icon
links or images to the share/icons directory.

It was designed to allow icons to be added by tv_grab_na_dd,
but there is no reason it can't be used for other purposes.

To minimize the load on zap2it.com, downloaded icons are recommended.
Links are available for those applications that need it.

Also to reduce the load on zap2it.com, this script should be run
sparingly. There is really no need to run it frequently, new networks
or icons don't show up that often.  To emphasize that point, there is
no --configure mode, the questions need to be answered each run.

=head1 OPTIONS

=over

=item --links

Store imge URLs in *.url files instead of downloading images.

=item -share I<SHAREDIR>

Icons are stored in I<SHAREDIR>/icons.  The share directory is set at install time,
but there may be times when it needs to be specified. (for example: no write access to the default share
directory)

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

=back

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHOR

Robert Eden

=cut 

use strict;
use open qw(:utf8);
use Getopt::Long;
use Data::Dumper;
use File::Basename;
use WWW::Mechanize 1.02;
use HTML::TreeBuilder;
use XML::Twig 3.28;

use XMLTV;
use XMLTV::Ask;
use XMLTV::Version '$Id: tv_grab_na_icons.in,v 1.11 2007/04/26 19:57:39 rmeden Exp $ ';
use XMLTV::Usage <<END
tv_grab_na_icons [--images]

END
;

select STDERR; $|=1;
select STDOUT; $|=1;

my $opt_help=0;
my $opt_debug=0;
my $opt_links=0;
my $SHARE_DIR='/usr/share/xmltv'; # by grab/na_icons/tv_grab_na_icons.PL
my $fileno=0;
my $file=undef;

GetOptions(
	   'help'          => \$opt_help,
	   'debug'         => \$opt_debug,
	   'links'         => \$opt_links,
	   'share=s'       => \$SHARE_DIR,
	  )
  or usage(0);

usage(1) if $opt_help;

die "ERROR:Share directory '$SHARE_DIR' not found\n" unless -d $SHARE_DIR;
mkdir "$SHARE_DIR/icons" unless -d "$SHARE_DIR/icons";
#
# create our agent
#
my $a = WWW::Mechanize->new( env_proxy => 1   );
$a->agent(sprintf("%s/$XMLTV::VERSION",basename($0)||'xmltv'));

print STDERR "Getting inital page\n" if $opt_debug;
$a->get('http://www.zap2it.com/index');
check_page($a);

#
# select zip
#
while (1)
{
    die "ERROR:Can't find zipcode form\n" unless find_form($a,"zipcode");

    my $zip=ask("\nPostal Code:");
    print STDERR "Submitting zip code $zip\n" if $opt_debug;
    $a->field("zipcode",$zip);
    $a->submit;
    check_page($a);
#
# bug in zap2it.com, zip not being picked up from first page, try next page
#
    if (grep(/Enter ZIP for local listings/,$a->content)) {
        die "ERROR:Can't find zipcode form\n" unless find_form($a,"zipcode");
        print STDERR "Submitting zip code $zip (again)\n" if $opt_debug;
        $a->field("zipcode",$zip);
        $a->submit;
    check_page($a);
   }
    last if grep(/No Provider Selected/,$a->content);
    last if grep(/Choose Your Provider/,$a->content);
    print "  Invalid Postal Code, try again\n";
}
# for some reason, we only get a little data without this header
$a->add_header( 'Accept-Language' => 'en-us,en;q=0.5');

#
# select lineup
#
{
    my %lineups=();
    my $def_lineup=undef;
    foreach $_ ($a->links) {
        next unless ($_->url =~ /lineupId=(.+)/ );
        my $name=$_->text;
           $name =~ s/\xa0//g; # drop bad characters

        print "Lineup $name\n" if $opt_debug;
        $lineups{$name}=\%$_;
        $def_lineup=$name unless defined $def_lineup;
    }
    
#print "Link: ", Dumper($_)  foreach $a->links();

    die "ERROR:Can't find provider links\n" unless defined $def_lineup;

    my $name=$def_lineup;
    $name=ask_choice("\nLineup?",$name,sort keys %lineups);

    printf STDERR "getting lineup %s\n",$lineups{$name}->url if $opt_debug;
#print Dumper($lineups{$name});
    $a->follow_link(url => $lineups{$name}->url);
    check_page($a);
} # select lineup
    
#
# select all channels
#
#    print "Display All Channels\n" if $opt_debug;
#    $a->follow_link(text_regex => qr/ALL CHANNELS/i) || warn "*WARNING* Can't find all channels link, hope we got them all!\n";
#    check_page($a);
#
#
# Convert HTML to XML
#
# we can use TWIG to do this because we need to use utf8_mode to avoid a warning
#
my $tree = HTML::TreeBuilder->new; # empty tree
   $tree->utf8_mode(1);
   $tree->parse($a->content);

my $xml = $tree->as_XML;

#
# Now parse the XML
#
my $image;
my %icons;
my $twig=XML::Twig->new(   
         twig_roots    => { html => 1},
		 twig_handlers => 
		      {
		         td  => sub {
                                  my $img_el;
                                  if (defined  $_->att('class')
                                            && $_->att('class') eq 'zc-station ') {
                                      my $name=$_ -> parent_trimmed_text;
                                      $name=~s/^([\.\d]+) //; # trim leading channel number
                                      
                                      if ( $img_el=$_ -> first_descendant('img')) {
                                          $icons{$name}=$img_el->att('src');
                                          print "Got image $name->$icons{$name}\n" if $opt_debug;
                                      }
                                      else {
                                          print "Got $name but no image\n" if $opt_debug;
                                      }
                                  }
                                  $_->twig->purge;
                                  return 0;
		         }
		      });
$twig->parse_html($xml);

#
# check for problems
#
unless (keys %icons)
{
        open  FILE,">na_icon_error.html" || die "ERROR:Can't open na_icon_error.html\n";
        print FILE $a->content;
        close FILE;
        die "ERROR:No icons were found.  Please check 'na_icon_error.html'\n";
}

#
# print results
#
my $base=$a->base;
foreach (sort keys %icons)
{
    $image=URI->new_abs($icons{$_},$base);
    if ($opt_links)
    {
        $file="$SHARE_DIR/icons/$_.url";
        open(FILE,">$file") || die "ERROR:Can't write to $file\n";
            print FILE $image."\n";
            close FILE;
        printf "Stored %10s in %20s\n",$_,$file;
    }
    else
    {
        my $type=(fileparse($image,'\..*'))[2];
        $file="$SHARE_DIR/icons/$_$type";
        printf "Getting %10s as %20s: %s\n",$_,$file,$a->mirror($image,$file)->message;
    }
}
    
exit;

#print Dumper($a);
#print "Link: ", Dumper($_)  foreach $a->links();
#print "Form: ", $_->dump  foreach $a->forms();
#print $a->current_form->dump;

#
# check status, write out html file
#
sub check_page {
    my $res=shift || die "ERROR:No Mechanize specified\n";
    $fileno++;
    if ($opt_debug)
    {
        $file="na_icon_${fileno}";
        open  FILE,">$file.html" || die "ERROR:Can't open $file.html\n";
        print FILE $res->content;
        close FILE;

        open  FILE,">$file.txt" || die "ERROR:Can't open $file.txt\n";
        print FILE Dumper($res);
        close FILE;
    }
    die "ERROR:page error ",$res->status_line unless $res->success;
} # check_page

#
# subroutine to search for form w/o knowing it's name
#
sub find_form
{
    my $mech=shift || die "ERROR:find_form: mechanize object not specified";
    my $name=shift || die "ERROR:find_form: field name not specified";
    my @forms=$mech->forms;
    my $fn=0;
    foreach (0..$#forms)
    {
        $fn=$_ if $forms[$_]->find_input($name);
    }
    $mech->form_number($fn+1) if $fn;
    return $fn;
} #find_form

