#!/usr/bin/perl -w
use strict; $|++; use HTML::TableExtract; use Getopt::Long; use LWP;
###############################################################################
# Generates a tabular list of all known World of Warhammer quests, checking   #
# off those you've completed. Also hides quests that aren't completable by    #
# your side, and separates everything by zone. Uses data from Allakhazham.    #
#                                                                             #
#     v1.2: 2007-02-02, morbus@disobey.com, email me if you use/modify.       #
###############################################################################
# To run this script, you'll need Perl and HTML::TableExtract. You'll also    #
# need an external file of completed quests, using the quest IDs from the     #
# wow.allakhazam.com site, one per line. Run the script as follows:           #
#                                                                             #
#       perl wowquests.pl qids.txt > wowquests.html                           #
#       perl wowquests.pl --side Alliance qids.txt > wowquests.html           #
#                                                                             #
# You can also filter by minimum and maximum levels - pass the command line   #
# args --minlevel 25 and --maxlevel 35 to get a list keyed to said range.     #
#                                                                             #
# The default side is "Horde". For the Horde! (Ha, ha! I'm cheesy! Wheee!)    #
###############################################################################
# changes (2007-07-21, version 1.3):                                          #
#   - min/max level filtering (thanks Edward Barton).                         #
#                                                                             #
# changes (2007-02-02, version 1.2):                                          #
#   - removed "Unknown" categories/zones.                                     #
#   - checks reported quest IDs vs. active quest IDs.                         #
#   - 30 second wait, then re-request, if zone URL returns no data.           #
#   - removed "Updated" images that showed up next to quest titles.           #
#                                                                             #
# changes (2006-02-09, version 1.1):                                          #
#   - we now keep track of quests available per zone and total.               #
#   - added initial support for Allakhazham's "Special Category" quests.      #
#   - added support for some of the unlisted "Unknown" categories/zones.      #
#   - print current zone to STDERR so we have a progress report.              #
#                                                                             #
# changes (2005-11-14, version 1.0):                                          #
#   - initial public release.                                                 #
###############################################################################

# which side is this character?
my %options = ( 'minlevel' => 1, 'maxlevel' => 99, 'side' => 'Horde' );
GetOptions(\%options, 'minlevel=i', 'maxlevel=i', 'side=s');

# load in the quest file full of qids per newline.
open(QUESTS, shift) or die "There was a quests file error: $!";
my %completed; while (<QUESTS>) { chomp; $completed{$_}++; } close(QUESTS);
my %found_completed; # completed quest IDs found on Alla (for later comparison).

# all our starting init crap. yawn.
my $browser      = LWP::UserAgent->new;
my %headers      = ('User-Agent' => 'Mozilla/5.0 Gecko/20051107 Firefox/1.5');
my $zones_url    = 'http://wow.allakhazam.com/db/qzone.html?x';
my $specials_url = 'http://wow.allakhazam.com/db/qspecial.html?x';
my $root_url     = 'http://wow.allakhazam.com';
my $side_regexp  = qr/($options{side}|BothSides|\?)/;
my $minlevel     = $options{'minlevel'};
my $maxlevel     = $options{'maxlevel'};

my @zone_urls; # will hold all the zones discovered.
my $zones_content = $browser->get($zones_url, %headers)->content;
while ($zones_content =~ /(\/db\/qlookup.html\?zone=\d+?)">(.*?)<\/a>/g) {
  next if $2 eq 'Darkmoon Faire'; # use "Special Category" version.
  push (@zone_urls, { name => $2, url => "$root_url$1" }); # slurpee.
}

# now we need to load in the "Special Category" quests.
my $specials_content = $browser->get($specials_url, %headers)->content;
while ($specials_content =~ /(\/db\/qlookup.html\?special=\-?\d+?)">(.*?)<\/a>/g) {
  push (@zone_urls, { name => $2, url => "$root_url$1" }); # slurpee. I ARR STONE MASTAH.
}

# sort the zones from our different groups alphabetically.
@zone_urls = sort { lc $a->{'name'} cmp lc $b->{'name'} } @zone_urls;

# skipped sides, total, completed zone, total. a waste of my's.
my $x_zone = 0; my $x_total = 0; my $c_zone = 0; my $c_total = 0;
my $o_zone = 0; my $o_total = 0; # number of quests available to us.

# we've got all our zones.
print header(); # HTML header.
foreach my $zone (@zone_urls) {
  my $zone_content = $browser->get($zone->{'url'}, %headers);
  until ($zone_content->is_success) { # keep trying until success, eh?
    print STDERR "Retrying $zone->{name} in 30 seconds.\n";
    sleep(30); $zone_content = $browser->get($zone->{'url'}, %headers);
  }

  my @te_headers = ["Name","Starts in","Side","Level","Start Npc"];
  my %te_config  = (keep_html => 1, strip_html_on_match => 0); # bah.
  my $te = HTML::TableExtract->new(headers=>@te_headers, %te_config);
  $te->parse($zone_content->content); print STDERR "Finding quests in $zone->{name}.\n";
  foreach my $ts ($te->table_states) {
    print "\n<h2><a href=\"$zone->{url}\">$zone->{name}</a></h2>\n";
    print "<table><tr><th></th><th>ID</th><th>Name</th><th>Starts in</th>".
          "<th>Level</th><th>Start NPC</th><th>Side</th></tr>\n";
    foreach my $r ($ts->rows) {
      @$r[0] =~ /wquest=(\d+)/; my $q = $1;
      @$r[0] =~ s/(\/db.*)/$root_url$1/g;
      @$r[0] =~ s/<img (.*?)>//gi; # no <img>.
      @$r[2] = @$r[2] ? @$r[2] : '?'; next unless $q;
      @$r[4] = @$r[4] ? @$r[4] : '?'; # no NPC? Whoops.
      if (@$r[2] !~ /$side_regexp/) { $x_zone++; next; }
      $o_zone++; # we can do this quest, so count it, eh?
      my $s = ''; if ($completed{$q}) { $c_zone++; $s = '*'; }
      if ($completed{$q}) { $found_completed{$q}++; } # later.
      next if @$r[3] <= $minlevel; next if @$r[3] >= $maxlevel;
      print "<tr><td class=\"c\">$s</td><td class=\"c\">$q</td>".
        "<td>@$r[0]</td><td>@$r[1]</td><td class=\"c\" width=\"35\">@$r[3]</td>".
        "<td>@$r[4]</td><td class=\"c\" width=\"8\">@$r[2]</td></tr>\n";
    } print "</table>"; # all done this zone, so print out some stats.
    print "<p>Unlisted quests not matching our side: $x_zone. "; # stat one.
    print "Listed quests matching our side: $o_zone. "; # stat two. mmMm.
    print "Quests completed for this zone: $c_zone.</p>\n"; # stat three.
    $x_total += $x_zone; $c_total += $c_zone; $x_zone = $c_zone = 0;
    $o_total += $o_zone; $o_zone = 0; # BOoirring.
    last; # we only care about quests STARTING here.
  }
}

# check reported player completions (in %completed) vs. the quest IDs we've
# found (in %found_completed) on the remote site and report discrepancies.
print STDERR "Looking for quest IDs not in active data (probably error in data).\n";
foreach my $completed (sort keys %completed) { if (!$found_completed{$completed}) {
  print STDERR "Quest ID $completed not found in active data.\n";
} }

# HTML footer and final counts. cos counting is fun.
print "\n<p>Total unlisted quests not matching our side: $x_total. ";
print "Total listed quests matching our side: $o_total. ";
print "Total quests completed: $c_total.</p>";
print footer();

################################################################
# HTML headers and footers. nothing too exciting here.         #
################################################################
sub header {
  my $updated = localtime(time);
  return <<EVIL_HEREDOC_HEADER_OF_ORMS_BY_GORE;
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
    "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head><title>World of Warcraft Quest Tracker</title>
 <style type="text/css"><!--
  body { font-size:11px;margin:1em;font-family:arial,sans-serif; }
  a { text-decoration:none; } tr.even td { background-color:#edf3fe; }
  th { border:1px solid rgb(196,196,196);background-color:rgb(248,248,248); }
  table { border:1px;margin-left:5px;margin-right:5px;padding:2px;width:98% }
  tr, td { border:1px solid rgb(235,235,235);border-top:0px; }
  p { margin-left:5px;margin-top:0; } .c { text-align:center; }
  h1, h2, h3 { background-color:transparent;color:#001080; }
 //--></style>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <script type="text/javascript"><!--
  onload = function() { stripe(); };
  function stripe() {
   var tables = document.getElementsByTagName("table");
    for (var i = 0; i < tables.length; i += 1) {
     var trs = tables[i].getElementsByTagName("tr");
     for (var j = 0; j < trs.length; j += 2) {
      trs[j].className += " even";
     }
    }
  }
 // --></script>
</head><body><h1>World of Warcraft Quest Tracker ($options{side})</h1>
<p style="font-size:1.4em;">The below contains a listing for all known World
of Warcraft quests available to the $options{side}, and was last generated
$updated. It was <a href="http://www.disobey.com/d/code/wowquests.pl">created
by a Perl script from Morbus Iff</a> that collects quest and zone data from the
<a href="http://wow.allakhazam.com/">Allakhazam World of Warcraft site</a>. Quests
with asterisks next to them have been completed for this particular character.</p>
EVIL_HEREDOC_HEADER_OF_ORMS_BY_GORE
}

sub footer { return "</body></html>"; }