#!/usr/bin/perl -w
use strict; $|++; use HTML::TableExtract; use Getopt::Long; use LWP;
###############################################################################
# Generates a tabular list of all known World of Warhammer quests, checking   #
# off those you've completed. Also hides quests that aren't completable by    #
# your side, and separates everything by category. Data from Allakhazham.     #
#                                                                             #
#     v1.5: 2008-10-28, morbus@disobey.com, email me if you use/modify.       #
###############################################################################
# To run this script, you'll need Perl and HTML::TableExtract. You'll also    #
# need an external file of completed quests, using the quest IDs from the     #
# wow.allakhazam.com site, one per line. Run the script as follows:           #
#                                                                             #
#       perl wowquests.pl qids.txt > wowquests.html                           #
#       perl wowquests.pl --side Alliance qids.txt > wowquests.html           #
#                                                                             #
# You can also filter by minimum and maximum levels - pass the command line   #
# args --minlevel 25 and --maxlevel 35 to get a list keyed to said range.     #
#                                                                             #
# The default side is "Horde". For the Horde! (Ha, ha! I'm cheesy! Wheee!)    #
###############################################################################
# changes (2008-10-28, version 1.5):                                          #
#   - added back in all the "broken" Seasonal entries.                        #
#   - Start NPC URLs are working again.                                       #
#                                                                             #
# changes (2008-10-22, version 1.4):                                          #
#   - updated to the latest version of Allakhazham's site.                    #
#   - fixed bug where level filters missed -1 leveled quests.                 #
#   - ignore a healthy dose of broken "Seasonal" database entries.            #
#                                                                             #
# changes (2007-07-21, version 1.3):                                          #
#   - min/max level filtering (thanks Edward Barton).                         #
#                                                                             #
# changes (2007-02-02, version 1.2):                                          #
#   - removed "Unknown" categories/zones.                                     #
#   - checks reported quest IDs vs. active quest IDs.                         #
#   - 30 second wait, then re-request, if zone URL returns no data.           #
#   - removed "Updated" images that showed up next to quest titles.           #
#                                                                             #
# changes (2006-02-09, version 1.1):                                          #
#   - we now keep track of quests available per zone and total.               #
#   - added initial support for Allakhazham's "Special Category" quests.      #
#   - added support for some of the unlisted "Unknown" categories/zones.      #
#   - print current zone to STDERR so we have a progress report.              #
#                                                                             #
# changes (2005-11-14, version 1.0):                                          #
#   - initial public release.                                                 #
###############################################################################

# which side is this character?
my %options = ( 'minlevel' => 1, 'maxlevel' => 999, 'side' => 'Horde' );
GetOptions(\%options, 'minlevel=i', 'maxlevel=i', 'side=s');

# load in the quest file full of qids per newline.
open(QUESTS, shift) or die "There was a quests file error: $!";
my %completed; while (<QUESTS>) { chomp; $completed{$_}++; } close(QUESTS);
my %found_completed; # completed quest IDs found on Alla (for later comparison).

# all our starting init crap. yawn.
my $browser      = LWP::UserAgent->new;
my %headers      = ('User-Agent' => 'Mozilla/5.0 Gecko/2008090512 Firefox/3.0.2');
my $root_url     = 'http://wow.allakhazam.com';
my $quests_url   = 'http://wow.allakhazam.com/db/questlist.html';
my $side_filter  = '?side=' . ($options{'side'} eq 'Alliance' ? 1 : 2);

# we use allakhazam's master listing of quests because it'll actually be
# cheaper, bandwidth-wise, then hitting each individual zone + "next" pages.
my %quests; # master list of all quests sorted by category. this'll be fun.
my $quests_per_page = 75;   # number of quests per single page on the site.
my $current_page    = 0;    # current page that we're viewing; starts at 0.
my $last_page       = 999;  # discovered through a regex; last page available.

# grab all quests until the last page.
while ($current_page <= $last_page) {
  my $current_url = $quests_url . $side_filter . '&start=' . ($current_page * $quests_per_page);
  my $current_data = $browser->get($current_url, %headers); # ^^ OMG, MORBUS WROTE SOME MATH!
  until ($current_data->is_success) { # keep trying until success.
    print STDERR "Download failed. Retrying current page in 30 seconds.\n";
    sleep(30); $current_data = $browser->get($current_url, %headers);
  }

  # we only do this once so it won't fail on the last page (which doesn't match the regex).
  ($last_page) = ($last_page != 999) ? $last_page : $current_data->content =~ m!">(\d{2,})</a></td>!;
  if ($current_page =~ /0/) { print STDERR "Downloading quest data (page $current_page of $last_page)...\n"; }

  my @te_headers = ["Quest","Level","Side","Start","Reward","Category","Tags"];
  my %te_config  = (keep_html => 1, strip_html_on_match => 0); # bah.
  my $te = HTML::TableExtract->new(headers=>@te_headers, %te_config);
  $te->parse($current_data->content);
  foreach my $ts ($te->table_states) {
    foreach my $r ($ts->rows) {
      my ($category_url) = $r->[5] =~ m!<a href="(.*?)">.*?</a>!;
      $r->[5] =~ s/<(.*?)>//g; my $category_name = $r->[5];
      $category_url = $category_url ? $category_url : '';

      if (!$quests{$category_name}) { # create this new category...
        $quests{$category_name} = { 'name' => $category_name, 'url' => $category_url };
      } # which we'll deliciously fill with quests. MmmM. yumMMmy quests.
      ($r->[7]) = $r->[0] =~ m!<a href=".*?>(.*?) ?</a>!; # ease sorting.
      push (@{$quests{$category_name}{'quests'}}, $r);
    }
  }

  $current_page++;
}

# oh, how I do love to keep useless stats. MmMMm.
print STDERR "Processing downloaded quest data...\n";
my $completed_category = 0; my $completed_total = 0; my $total_category = 0; my $total_side = 0;

print header();
foreach my $category (sort { $quests{$a}->{'name'} cmp $quests{$b}->{'name'} } keys %quests) {
  print "\n<h2><a href=\"$root_url$quests{$category}{url}\">$quests{$category}{name}</a></h2>\n";
  print "<table><tr><th></th><th>ID</th><th>Name</th><th>Category</th>".
        "<th>Level</th><th>Start NPC</th><th>Side</th></tr>\n";
  foreach my $quest (sort { $a->[7] cmp $b->[7] } @{$quests{$category}{'quests'}}) {
    my $s = ''; # will contain an asterisk for "we did it!"
    my ($id) = $quest->[0] =~ m!wquest=(\d+)!; next unless $id;
    $quest->[0] =~ s/"> <\/a>/">???<\/a>/; # no Name, but link?
    $quest->[0] =~ s/(\/db.*)/$root_url$1/g; # make a full URL.
    $quest->[0] =~ s/<img (.*?)>//gi; # no images please. ty.
    $quest->[2] = $quest->[2] ? $quest->[2] : '???'; # no Side?
    $quest->[3] = $quest->[3] ? $quest->[3] : '???'; # no NPC?
    $quest->[3] =~ s/(\/db.*)/$root_url$1/g; # make a full URL.

    $total_category++; # we can do this quest, so count it, eh?
    if ($completed{$id}) { $completed_category++; $s = '*'; }
    if ($completed{$id}) { $found_completed{$id}++; } # later.
    if ($quest->[1] != -1) { # some quests are set as -1.
      next if $quest->[1] <= $options{'minlevel'};
      next if $quest->[1] >= $options{'maxlevel'};
    } # only filter those that are positive. ish.
    print "<tr><td class=\"c\">$s</td><td class=\"c\">$id</td>".
      "<td>$quest->[0]</td><td>$quest->[5]</td><td class=\"c\" width=\"35\">$quest->[1]</td>".
      "<td>$quest->[3]</td><td class=\"c\" width=\"8\">$quest->[2]</td></tr>\n";
  } print "</table>"; # all done this category, so print out some stats.
  print "<p>Listed quests matching our side: $total_category. "; # stat one. mmMm.
  print "Quests completed for this category: $completed_category.</p>\n"; # stat two.
  $completed_total += $completed_category; $completed_category = 0; # SupPpeERR.
  $total_side += $total_category; $total_category = 0; # BOoirring.
}

# check reported player completions (in %completed) vs. the quest IDs we've
# found (in %found_completed) on the remote site and report discrepancies.
print STDERR "Looking for quest IDs not in active data (potential error in data).\n";
foreach my $completed (sort keys %completed) { if (!$found_completed{$completed}) {
  print STDERR "Quest ID $completed not found in active data.\n";
} }

# HTML footer and final counts. cos counting is fun.
print "\n<p>Total listed quests matching our side: $total_side. ";
print "Total quests completed: $completed_total.</p>";
print STDERR "Done.\n";
print footer();

################################################################
# HTML headers and footers. nothing too exciting here.         #
################################################################
sub header {
  my $updated = localtime(time);
  return <<EVIL_HEREDOC_HEADER_OF_ORMS_BY_GORE;
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
    "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head><title>World of Warcraft Quest Tracker</title>
 <style type="text/css"><!--
  body { font-size:11px;margin:1em;font-family:arial,sans-serif; }
  a { text-decoration:none; } tr.even td { background-color:#edf3fe; }
  th { border:1px solid rgb(196,196,196);background-color:rgb(248,248,248); }
  table { border:1px;margin-left:5px;margin-right:5px;padding:2px;width:98% }
  tr, td { border:1px solid rgb(235,235,235);border-top:0px; }
  p { margin-left:5px;margin-top:0; } .c { text-align:center; }
  h1, h2, h3 { background-color:transparent;color:#001080; }
 //--></style>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <script type="text/javascript"><!--
  onload = function() { stripe(); };
  function stripe() {
   var tables = document.getElementsByTagName("table");
    for (var i = 0; i < tables.length; i += 1) {
     var trs = tables[i].getElementsByTagName("tr");
     for (var j = 0; j < trs.length; j += 2) {
      trs[j].className += " even";
     }
    }
  }
 // --></script>
</head><body><h1>World of Warcraft Quest Tracker ($options{side})</h1>
<p style="font-size:1.4em;">The below contains a listing for all known World
of Warcraft quests available to the $options{side}, and was last generated
$updated. It was <a href="http://www.disobey.com/d/code/wowquests.pl">created
by a Perl script from Morbus Iff</a> that collects quest and category data from the
<a href="http://wow.allakhazam.com/">Allakhazam World of Warcraft site</a>. Quests
with asterisks next to them have been completed for this particular character.</p>
EVIL_HEREDOC_HEADER_OF_ORMS_BY_GORE
}

sub footer { return "</body></html>"; }