00001 #!/usr/bin/perl -w
00002 #
00003 # Scan channels for missing artwork, and download if found.
00004 #
00005 # Also does searches for artwork URLs based on callsign or xmltvid, or displays
00006 # the master iconmap xml file from services.mythtv.org.
00007 #
00008 # @url $URL: http://svn.mythtv.org/svn/branches/release-0-21-fixes/mythtv/contrib/icons/master_iconmap/channel_icons.pl $
00009 # @date $Date: 2007-03-23 01:38:00 -0400 (Fri, 23 Mar 2007) $
00010 # @version $Revision: 13114 $
00011 # @author $Author: xris $
00012 # @copyright MythTV
00013 # @license GPL
00014 #
00015
00016 # User agent (IE 6)
00017 our $UserAgent = 'MythTV Channel Icon lookup bot';
00018
00019 # Base data URL
00020 our $data_url = 'http://services.mythtv.org/channel-icon/';
00021
00022 # Includes
00023 use HTTP::Request;
00024 use LWP::UserAgent;
00025 use Fcntl qw(F_SETFD);
00026 use Getopt::Long;
00027 use MythTV;
00028
00029 # Autoflush
00030 $|++;
00031
00032 # Load cli arguments
00033 my ($find_missing, $rescan, $icon_dir,
00034 $callsign, $xmltvid, $iconmap,
00035 $usage);
00036 GetOptions(
00037 # Find missing icons
00038 'find-missing' => \$find_missing,
00039 'rescan' => \$rescan,
00040 'icon-dir=s' => \$icon_dir,
00041 # Lookup
00042 'callsign=s' => \$callsign,
00043 'xmltvid=s' => \$xmltvid,
00044 'iconmap' => \$iconmap,
00045 # Misc
00046 'usage|help' => \&print_usage,
00047 );
00048
00049 # Directory exists/writable/etc.?
00050 unless ($icon_dir) {
00051 if ($ENV{'HOME'}) {
00052 $icon_dir = "$ENV{'HOME'}/.mythtv/channels";
00053 }
00054 elsif ($ENV{'USER'}) {
00055 $icon_dir = "/home/$ENV{'USER'}/.mythtv/channels";
00056 }
00057 else {
00058 die "Unable to determine home directory: blank HOME and USER environment vars.\n";
00059 }
00060 }
00061 die "Icon directory $icon_dir is not a directory.\n" unless (-d $icon_dir);
00062 die "Icon directory $icon_dir is not a writable.\n" unless (-w $icon_dir);
00063
00064 # Ping the server
00065 my $data = wget("$data_url/ping");
00066 if (!$data || $data !~ /^\d+$/m) {
00067 print "Unknown ping response from services.mythtv.org:\n\n$data\n\n",
00068 "Please try again later.\n";
00069 exit;
00070 }
00071
00072 # Callsign lookup?
00073 elsif ($callsign) {
00074 my ($url, $err) = station_lookup('callsign', $callsign);
00075 print "$url\n";
00076 }
00077
00078 # xmltvid lookup?
00079 elsif ($xmltvid) {
00080 my ($url, $err) = station_lookup('xmltvid', $xmltvid);
00081 print "$url\n";
00082 }
00083
00084 # Print the master iconmap?
00085 elsif ($iconmap) {
00086 print wget('http://services.mythtv.org/channel-icon/master-iconmap');
00087 }
00088
00089 # Find missing channel links
00090 elsif ($find_missing) {
00091 # Connect to mythbackend
00092 my $Myth = new MythTV();
00093 # Load MythTV's channels
00094 $Myth->load_channels();
00095 # First, scan through the channels to build a lookup list (this lets us do
00096 # a bulk lookup, instead of blasting the webserver with a bunch of
00097 # individual queries.
00098 my $chan_csv;
00099 my $searchcount = 0;
00100 foreach my $chanid (keys %{$Myth->{'channels'}}) {
00101 my $channel = $Myth->{'channels'}{$chanid};
00102 # Skip channels we already have icons for?
00103 next if ($channel->{'icon'} && !$rescan);
00104 # Just in case there's a newline or something that'll make the csv
00105 # reader on the other end get confused
00106 my $name = $channel->{'name'};
00107 $name =~ s/\s+/ /sg;
00108 # Add this line to the csv
00109 $searchcount++;
00110 $chan_csv .= join(',', escape_csv($chanid),
00111 escape_csv($name),
00112 escape_csv($channel->{'xmltvid'}),
00113 escape_csv($channel->{'callsign'}),
00114 escape_csv($channel->{'dtv_transportid'}),
00115 escape_csv($channel->{'atsc_major_chan'}),
00116 escape_csv($channel->{'atsc_minor_chan'}),
00117 escape_csv($channel->{'dtv_networkid'}),
00118 escape_csv($channel->{'serviceid'}),
00119 )."\n";
00120 }
00121 my $data = wget("$data_url/findmissing", '', {'csv' => $chan_csv});
00122 unless ($data =~ s/#\s*$
00123 print "No data was returned from your channel search.\n",
00124 "Please try again later.\n";
00125 exit;
00126 }
00127 # Parse out all of our channel info
00128 my %matches;
00129 my $count = 0;
00130 foreach my $line (split /\n/, $data) {
00131 $count++;
00132 my ($chanid, $type, $id, $name, $url) = extract_csv($line);
00133 $matches{$chanid}{$type} = {'id' => $id,
00134 'name' => $name,
00135 'url' => $url};
00136 }
00137 # Notify the user about what's going to happen
00138 my $white = ' ' x (29 - length($count) - length($searchcount));
00139 print <<EOF;
00140
00141 +----------------------------------------------------------------------------+
00142 | |
00143 | MythTV.org returned info about $count of $searchcount channels.$white|
00144 | |
00145 | You will now be prompted to choose the correct icon URL for each of your |
00146 | channels. You will also have the opportunity to transmit your choices |
00147 | back to mythtv.org so that others can benefit from your selections. |
00148 | |
00149 +----------------------------------------------------------------------------+
00150
00151 EOF
00152 # Prepare a db query
00153 my $sh = $Myth->{'dbh'}->prepare('UPDATE channel
00154 SET icon = ?
00155 WHERE chanid = ?');
00156 # Parse our known channels to find matches, and keep track of the
00157 # results so they can be submitted back to the webserver.
00158 my $match_csv;
00159 $count = 0;
00160 foreach my $chanid (sort { $Myth->{'channels'}{$a}{'name'} cmp $Myth->{'channels'}{$b}{'name'} } keys %{$Myth->{'channels'}}) {
00161 my $channel = $Myth->{'channels'}{$chanid};
00162 # Skip invisible channels
00163 next unless ($channel->{'visible'});
00164 # Skip channels we already have icons for?
00165 next if ($channel->{'icon'} && !$rescan);
00166 # Load the fuzzy matches
00167 my $fuzzy = $matches{$chanid};
00168 # Perfect xmltv match?
00169 if ($fuzzy->{'xmltvid'}) {
00170 # Download the icon
00171 $data = wget($fuzzy->{'xmltvid'}{'url'});
00172 if ($data) {
00173 my ($img) = $fuzzy->{'xmltvid'}{'url'} =~ /([^\/]+)$/;
00174 if (open ICON, ">$icon_dir/$img") {
00175 print ICON $data;
00176 close ICON;
00177 # Update the DB
00178 $sh->execute("$icon_dir/$img", $channel->{'chanid'});
00179 }
00180 else {
00181 print STDERR "Error writing icon file $icon_dir/$img : $1\n";
00182 }
00183 }
00184 else {
00185 print STDERR "Error downloading icon: $fuzzy->{'xmltvid'}{'url'}";
00186 }
00187 $count++;
00188 next;
00189 }
00190 # Interactively search through the icons
00191 my ($icon, $icon_csv);
00192 while (1) {
00193 $icon = search_icons($channel->{'name'},
00194 "Found unrecognized channel: #$channel->{'channum'} / $channel->{'callsign'} / $channel->{'name'}",
00195 $fuzzy);
00196 print "\n";
00197 last unless ($icon);
00198 # Build a csv string for this icon
00199 $icon_csv .= join(',', escape_csv($icon->{'id'}),
00200 escape_csv($channel->{'name'}),
00201 escape_csv($channel->{'xmltvid'}),
00202 escape_csv($channel->{'callsign'}),
00203 escape_csv($channel->{'dtv_transportid'}),
00204 escape_csv($channel->{'atsc_major_chan'}),
00205 escape_csv($channel->{'atsc_minor_chan'}),
00206 escape_csv($channel->{'dtv_networkid'}),
00207 escape_csv($channel->{'serviceid'})
00208 )."\n";
00209 # Make sure that the requested choice isn't currently blocked
00210 my $blocks = is_blocked($icon_csv);
00211 if ($blocks) {
00212 print "This combination of channel and icon has been blocked by the MythTV admins.\n",
00213 "The most common reason for this is that there is a better match available.\n\n",
00214 "Blocked: $blocks\n\n";
00215 # Accept input
00216 print 'Are you still sure that you want to use this icon? ';
00217 my $choice = <STDIN>;
00218 print "\n";
00219 $icon = undef unless ($choice =~ /^\s*[yt1]/i)
00220 }
00221 last if ($icon);
00222 }
00223 # Exit the program?
00224 last unless (defined $icon);
00225 # Skipped icon?
00226 next unless ($icon);
00227 # Keep track of this match so we can submit it to the server later.
00228 $match_csv .= $icon_csv;
00229 # Download the icon
00230 $data = wget($icon->{'url'});
00231 if ($data) {
00232 my ($img) = $icon->{'url'} =~ /([^\/]+)$/;
00233 if (open ICON, ">$icon_dir/$img") {
00234 print ICON $data;
00235 close ICON;
00236 # Update the DB
00237 $sh->execute("$icon_dir/$img", $channel->{'chanid'});
00238 }
00239 else {
00240 print STDERR "Error writing icon file $icon_dir/$img : $1\n";
00241 }
00242 }
00243 else {
00244 print STDERR "Error downloading icon: $fuzzy->{'xmltvid'}{'url'}";
00245 }
00246 $count++;
00247 }
00248 # Submit the found channels.
00249 if ($match_csv) {
00250 print "Submit channel information to mythtv.org? ";
00251 my $choice = lc(<STDIN>);
00252 $choice = ($choice =~ /^y/) ? 1 : 0;
00253 if ($choice) {
00254 my $data = wget("$data_url/submit", '', {'csv' => $match_csv});
00255 if ($data =~ s/\s+#\s*$
00256 if ($data =~ /^t:(\d+)$/m) {
00257 print "Submitted $1 channels, mapping icons to:\n\n";
00258 print " xmltvid: $1\n" if ($data =~ /^x:(\d+)$/m && $1 > 0);
00259 print " callsign: $1\n" if ($data =~ /^c:(\d+)$/m && $1 > 0);
00260 print " atsc: $1\n" if ($data =~ /^a:(\d+)$/m && $1 > 0);
00261 print " dvb: $1\n" if ($data =~ /^d:(\d+)$/m && $1 > 0);
00262 print "\nThank you.\n";
00263 }
00264 else {
00265 print "No channel icons were accepted by the server.\n",
00266 "Thank you for trying, though.\n";
00267 }
00268 }
00269 else {
00270 print "No data was returned from your channel submission.\n",
00271 "Thank you for trying, though.\n";
00272 exit;
00273 }
00274 }
00275 }
00276 # Report back to the user
00277 print "Imported $count channel icons into your MythTV database.\n";
00278 # Cleanup
00279 $sh->finish;
00280 }
00281
00282 # Otherwise, just print the usage message
00283 else {
00284 print_usage();
00285 }
00286
00287 # Done
00288 exit;
00289
00290 ################################################################################
00291
00292 # Check if a particular combination of icon and channel have been blocked
00293 sub is_blocked {
00294 my $csv = shift;
00295 $data = wget("$data_url/checkblock", '', {'csv' => $csv});
00296 if ($data =~ /\w/ && $data =~ s/\s+#\s*$
00297 $data =~ s/\W+/,/s;
00298 return $data;
00299 }
00300 return '';
00301 }
00302
00303 # Search the web for a specific icon
00304 sub station_lookup {
00305 my $field = shift;
00306 my %params;
00307 if ($field =~ /xmltvid|callsign/i) {
00308 %params = ( $field => shift );
00309 }
00310 elsif ($field =~ /atsc/) {
00311 %params = ( 'transportid' => shift,
00312 'major_chan' => shift,
00313 'minor_chan' => shift,
00314 );
00315 }
00316 elsif ($field =~ /dvb/) {
00317 %params = ( 'transportid' => shift,
00318 'networkid' => shift,
00319 'serviceid' => shift,
00320 );
00321 }
00322 else {
00323 die "Unknown field $field passed to station_lookup()\n";
00324 }
00325 # Lookup, parse and return
00326 $data = wget("$data_url/lookup", '', \%params);
00327 if ($data =~ /^ERROR:\s/) {
00328 return ($data, 1);
00329 }
00330 else {
00331 my ($type, $url, $id, $name) = extract_csv($data);
00332 return ($url);
00333 }
00334 }
00335
00336 # Search icons interactively for a specified string
00337 sub search_icons {
00338 my $string = shift;
00339 my $prompt = shift;
00340 my $fuzzy = shift;
00341 # Prompt about the fuzzy matches?
00342 if (ref $fuzzy eq 'HASH' && keys %{$fuzzy}) {
00343 print "$prompt\n Recommended matches:\n\n";
00344 # Print the list of possible matches, and keep track
00345 my %hash = ();
00346 $i = 0;
00347 foreach my $key (sort { $fuzzy->{$a}{'name'} cmp $fuzzy->{$a}{'name'} } keys %{$fuzzy}) {
00348 $i++;
00349 $hash{$i} = $key;
00350 my $match = $fuzzy->{$key};
00351 print " $i) $match->{'name'}\n",
00352 " $match->{'url'}\n\n";
00353 }
00354 # Along with the manual options
00355 print " M) More options\n",
00356 " S) Skip This Channel\n",
00357 " E) Exit\n\n";
00358 # Accept input
00359 my $choice;
00360 while (1) {
00361 print 'Choice: ';
00362 $choice = lc(<STDIN>);
00363 $choice =~ s/^\s+
00364 $choice =~ s/\s+$
00365 # Skip?
00366 return 0 if ($choice eq 's');
00367 # Exit completely?
00368 return undef if ($choice eq 'e');
00369 # Selected an icon
00370 return $fuzzy->{$hash{$choice}} if ($hash{$choice});
00371 # Other options
00372 last if ($choice eq 'm');
00373 }
00374 }
00375 # Loop and search until we get a good response
00376 my $manual;
00377 while (1) {
00378 print "$prompt\n";
00379 if ($manual) {
00380 print " Manually searching for: $manual\n";
00381 }
00382 print "\n";
00383 # Search
00384 my %matches;
00385 my $count = 0;
00386 my $data = wget("$data_url/search", '', {'s' => $manual ? "%$manual" : $string});
00387 if ($data =~ /\w/ && $data =~ s/\s+#\s*$
00388 # Parse out all of our channel info
00389 foreach my $line (split /\n/, $data) {
00390 $count++;
00391 my ($id, $name, $url) = extract_csv($line);
00392 $matches{$count} = {'id' => $id,
00393 'name' => $name,
00394 'url' => $url};
00395 }
00396 }
00397 # Print the list of possible matches, and keep track
00398 my %hash = ();
00399 $i = 0;
00400 if ($count) {
00401 foreach my $key (sort { $matches{$a}{'name'} cmp $matches{$b}{'name'}; } keys %matches) {
00402 $i++;
00403 $hash{$i} = $key;
00404 my $match = $matches{$key};
00405 print ' ' x (length($count) - length($i)),
00406 "$i) $match->{'name'}\n",
00407 ' ' x (length($count) + 2),
00408 "$match->{'url'}\n\n";
00409 }
00410 }
00411 else {
00412 print "No matching icons were found.\n\n";
00413 }
00414 # Along with the manual options
00415 if ($manual) {
00416 print ' ' x (length($count) - 1),
00417 "D) Default Search\n";
00418 }
00419 print ' ' x (length($count) - 1),
00420 "M) Manual Search\n",
00421 ' ' x (length($count) - 1),
00422 "S) Skip This Channel\n",
00423 ' ' x (length($count) - 1),
00424 "E) Exit\n\n";
00425 # Accept input
00426 my $choice;
00427 while (1) {
00428 print 'Choice: ';
00429 $choice = lc(<STDIN>);
00430 $choice =~ s/^\s+
00431 $choice =~ s/\s+$
00432 # Skip?
00433 return 0 if ($choice eq 's');
00434 # Exit completely?
00435 return undef if ($choice eq 'e');
00436 # Selected an icon
00437 return $matches{$hash{$choice}} if ($hash{$choice});
00438 # Other options
00439 last if ($choice =~ /^[dm]$/);
00440 }
00441 # Back to the default search
00442 if ($choice =~ /d/i) {
00443 undef $manual;
00444 }
00445 # Manual search? Prompt for the new search term
00446 if ($choice =~ /[gm]/i) {
00447 while (1) {
00448 print 'Search for: ';
00449 $manual = <STDIN>;
00450 $manual =~ s/^\s+
00451 $manual =~ s/\s+$
00452 last if ($manual =~ /\w/);
00453 }
00454 }
00455 # Add a line break for clarification
00456 print "\n\n";
00457 }
00458 }
00459
00460 # Calls wget to retrieve a URL, and returns some helpful information along with
00461 # the page contents. Usage: wget(url, [referrer])
00462 sub wget {
00463 my $url = shift;
00464 my $referer = shift;
00465 my $params = shift;
00466 # Set up the HTTP::Request object
00467 my $req;
00468 if ($params) {
00469 my $query;
00470 # Build the query
00471 foreach my $var (keys %{$params}) {
00472 $query .= '&' if ($query);
00473 $query .= urlencode($var).'='.urlencode($params->{$var});
00474 }
00475 # POST
00476 $req = HTTP::Request->new('POST', $url);
00477 $req->header('Content-Type' => 'application/x-www-form-urlencoded',
00478 'Content-Length' => length($query));
00479 $req->content($query);
00480 }
00481 else {
00482 $req = HTTP::Request->new('GET', $url);
00483 }
00484 $req->referer($referer) if ($referer);
00485 # Make the request
00486 my $ua = LWP::UserAgent->new(keep_alive => 1);
00487 $ua->agent($UserAgent);
00488 #$ua->timeout(300);
00489 my $response = $ua->request($req);
00490 # Return the results
00491 return undef unless ($response->is_success);
00492 return $response->content;
00493 }
00494
00495 # URL-encode a string
00496 sub urlencode {
00497 my $str = (shift or '');
00498 $str =~ s/([^\w*\.\-])/sprintf '%%%02x', ord $1/sge;
00499 return $str;
00500 }
00501
00502 # Escape a string for safe insertion into a csv file
00503 sub escape_csv {
00504 my $str = (shift or '');
00505 $str =~ s/"/""/g;
00506 return "\"$str\"";
00507 }
00508
00509 # Extract csv fields from a line
00510 sub extract_csv {
00511 my $line = shift;
00512 my @fields = ();
00513 # Clean up the line and split out the fields
00514 $line =~ s/\s*$/,/; # Convert end of lines to a comma so the next parser will work
00515 while ($line =~ m/( (?!\")[^,]* # Handle normal fields
00516 | "(?:[\"\\]\"|.)*?" # Handle quoted fields, escaped quotes as "" or \"
00517 ), # End of line now has a comma, too
00518 /mgx) {
00519 my $val = $1;
00520 $val = '' unless (defined $val);
00521 # Remove and unescape quotes
00522 if ($val =~ /^"(.+)"$/) {
00523 if ($1 || length($val) > 0) {
00524 $val = $1;
00525 }
00526 else {
00527 $val = '';
00528 }
00529 $val =~ s/["\\]"/"/sg;
00530 }
00531 # Add the field to the list
00532 push @fields, $val;
00533 }
00534 # Return
00535 return @fields;
00536 }
00537
00538 # Print the usage message
00539 sub print_usage {
00540 print <<EOF;
00541
00542 usage: $0 [options]
00543
00544 options:
00545
00546 --find-missing
00547
00548 Scan your database and download any missing channel icons.
00549 See also: --rescan --icon-dir
00550
00551 --rescan
00552
00553 Use in combination with --find-missing to rescan all of your channels
00554 for new icons.
00555
00556 --icon-dir {path}
00557
00558 Directory in which to store downloaded channel icons. Used in
00559 combination with --find-missing. Defaults to ~/.mythtv/channels/
00560
00561 --callsign {string}
00562
00563 Display the icon URL for the specified callsign.
00564
00565 --xmltvid {string}
00566
00567 Display the icon URL for the specified xmltvid.
00568
00569 --iconmap
00570
00571 Display the master iconmap from:
00572 http:
00573
00574 --usage
00575
00576 Display this message
00577
00578 EOF
00579 exit;
00580 }