#!/usr/bin/perl -w use strict; $|++; use HTML::TableExtract; use Getopt::Long; use LWP; ############################################################################### # Generates a tabular list of all known World of Warhammer quests, checking # # off those you've completed. Also hides quests that aren't completable by # # your side, and separates everything by zone. Uses data from Allakhazham. # # # # v1.2: 2007-02-02, morbus@disobey.com, email me if you use/modify. # ############################################################################### # To run this script, you'll need Perl and HTML::TableExtract. You'll also # # need an external file of completed quests, using the quest IDs from the # # wow.allakhazam.com site, one per line. Run the script as follows: # # # # perl wowquests.pl qids.txt > wowquests.html # # perl wowquests.pl --side Alliance qids.txt > wowquests.html # # # # You can also filter by minimum and maximum levels - pass the command line # # args --minlevel 25 and --maxlevel 35 to get a list keyed to said range. # # # # The default side is "Horde". For the Horde! (Ha, ha! I'm cheesy! Wheee!) # ############################################################################### # changes (2007-07-21, version 1.3): # # - min/max level filtering (thanks Edward Barton). # # # # changes (2007-02-02, version 1.2): # # - removed "Unknown" categories/zones. # # - checks reported quest IDs vs. active quest IDs. # # - 30 second wait, then re-request, if zone URL returns no data. # # - removed "Updated" images that showed up next to quest titles. # # # # changes (2006-02-09, version 1.1): # # - we now keep track of quests available per zone and total. # # - added initial support for Allakhazham's "Special Category" quests. # # - added support for some of the unlisted "Unknown" categories/zones. # # - print current zone to STDERR so we have a progress report. # # # # changes (2005-11-14, version 1.0): # # - initial public release. # ############################################################################### # which side is this character? my %options = ( 'minlevel' => 1, 'maxlevel' => 99, 'side' => 'Horde' ); GetOptions(\%options, 'minlevel=i', 'maxlevel=i', 'side=s'); # load in the quest file full of qids per newline. open(QUESTS, shift) or die "There was a quests file error: $!"; my %completed; while (<QUESTS>) { chomp; $completed{$_}++; } close(QUESTS); my %found_completed; # completed quest IDs found on Alla (for later comparison). # all our starting init crap. yawn. my $browser = LWP::UserAgent->new; my %headers = ('User-Agent' => 'Mozilla/5.0 Gecko/20051107 Firefox/1.5'); my $zones_url = 'http://wow.allakhazam.com/db/qzone.html?x'; my $specials_url = 'http://wow.allakhazam.com/db/qspecial.html?x'; my $root_url = 'http://wow.allakhazam.com'; my $side_regexp = qr/($options{side}|BothSides|\?)/; my $minlevel = $options{'minlevel'}; my $maxlevel = $options{'maxlevel'}; my @zone_urls; # will hold all the zones discovered. my $zones_content = $browser->get($zones_url, %headers)->content; while ($zones_content =~ /(\/db\/qlookup.html\?zone=\d+?)">(.*?)<\/a>/g) { next if $2 eq 'Darkmoon Faire'; # use "Special Category" version. push (@zone_urls, { name => $2, url => "$root_url$1" }); # slurpee. } # now we need to load in the "Special Category" quests. my $specials_content = $browser->get($specials_url, %headers)->content; while ($specials_content =~ /(\/db\/qlookup.html\?special=\-?\d+?)">(.*?)<\/a>/g) { push (@zone_urls, { name => $2, url => "$root_url$1" }); # slurpee. I ARR STONE MASTAH. } # sort the zones from our different groups alphabetically. @zone_urls = sort { lc $a->{'name'} cmp lc $b->{'name'} } @zone_urls; # skipped sides, total, completed zone, total. a waste of my's. my $x_zone = 0; my $x_total = 0; my $c_zone = 0; my $c_total = 0; my $o_zone = 0; my $o_total = 0; # number of quests available to us. # we've got all our zones. print header(); # HTML header. foreach my $zone (@zone_urls) { my $zone_content = $browser->get($zone->{'url'}, %headers); until ($zone_content->is_success) { # keep trying until success, eh? print STDERR "Retrying $zone->{name} in 30 seconds.\n"; sleep(30); $zone_content = $browser->get($zone->{'url'}, %headers); } my @te_headers = ["Name","Starts in","Side","Level","Start Npc"]; my %te_config = (keep_html => 1, strip_html_on_match => 0); # bah. my $te = HTML::TableExtract->new(headers=>@te_headers, %te_config); $te->parse($zone_content->content); print STDERR "Finding quests in $zone->{name}.\n"; foreach my $ts ($te->table_states) { print "\n<h2><a href=\"$zone->{url}\">$zone->{name}</a></h2>\n"; print "<table><tr><th></th><th>ID</th><th>Name</th><th>Starts in</th>". "<th>Level</th><th>Start NPC</th><th>Side</th></tr>\n"; foreach my $r ($ts->rows) { @$r[0] =~ /wquest=(\d+)/; my $q = $1; @$r[0] =~ s/(\/db.*)/$root_url$1/g; @$r[0] =~ s/<img (.*?)>//gi; # no <img>. @$r[2] = @$r[2] ? @$r[2] : '?'; next unless $q; @$r[4] = @$r[4] ? @$r[4] : '?'; # no NPC? Whoops. if (@$r[2] !~ /$side_regexp/) { $x_zone++; next; } $o_zone++; # we can do this quest, so count it, eh? my $s = ''; if ($completed{$q}) { $c_zone++; $s = '*'; } if ($completed{$q}) { $found_completed{$q}++; } # later. next if @$r[3] <= $minlevel; next if @$r[3] >= $maxlevel; print "<tr><td class=\"c\">$s</td><td class=\"c\">$q</td>". "<td>@$r[0]</td><td>@$r[1]</td><td class=\"c\" width=\"35\">@$r[3]</td>". "<td>@$r[4]</td><td class=\"c\" width=\"8\">@$r[2]</td></tr>\n"; } print "</table>"; # all done this zone, so print out some stats. print "<p>Unlisted quests not matching our side: $x_zone. "; # stat one. print "Listed quests matching our side: $o_zone. "; # stat two. mmMm. print "Quests completed for this zone: $c_zone.</p>\n"; # stat three. $x_total += $x_zone; $c_total += $c_zone; $x_zone = $c_zone = 0; $o_total += $o_zone; $o_zone = 0; # BOoirring. last; # we only care about quests STARTING here. } } # check reported player completions (in %completed) vs. the quest IDs we've # found (in %found_completed) on the remote site and report discrepancies. print STDERR "Looking for quest IDs not in active data (probably error in data).\n"; foreach my $completed (sort keys %completed) { if (!$found_completed{$completed}) { print STDERR "Quest ID $completed not found in active data.\n"; } } # HTML footer and final counts. cos counting is fun. print "\n<p>Total unlisted quests not matching our side: $x_total. "; print "Total listed quests matching our side: $o_total. "; print "Total quests completed: $c_total.</p>"; print footer(); ################################################################ # HTML headers and footers. nothing too exciting here. # ################################################################ sub header { my $updated = localtime(time); return <<EVIL_HEREDOC_HEADER_OF_ORMS_BY_GORE; <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head><title>World of Warcraft Quest Tracker</title> <style type="text/css"><!-- body { font-size:11px;margin:1em;font-family:arial,sans-serif; } a { text-decoration:none; } tr.even td { background-color:#edf3fe; } th { border:1px solid rgb(196,196,196);background-color:rgb(248,248,248); } table { border:1px;margin-left:5px;margin-right:5px;padding:2px;width:98% } tr, td { border:1px solid rgb(235,235,235);border-top:0px; } p { margin-left:5px;margin-top:0; } .c { text-align:center; } h1, h2, h3 { background-color:transparent;color:#001080; } //--></style> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> <script type="text/javascript"><!-- onload = function() { stripe(); }; function stripe() { var tables = document.getElementsByTagName("table"); for (var i = 0; i < tables.length; i += 1) { var trs = tables[i].getElementsByTagName("tr"); for (var j = 0; j < trs.length; j += 2) { trs[j].className += " even"; } } } // --></script> </head><body><h1>World of Warcraft Quest Tracker ($options{side})</h1> <p style="font-size:1.4em;">The below contains a listing for all known World of Warcraft quests available to the $options{side}, and was last generated $updated. It was <a href="http://www.disobey.com/d/code/wowquests.pl">created by a Perl script from Morbus Iff</a> that collects quest and zone data from the <a href="http://wow.allakhazam.com/">Allakhazam World of Warcraft site</a>. Quests with asterisks next to them have been completed for this particular character.</p> EVIL_HEREDOC_HEADER_OF_ORMS_BY_GORE } sub footer { return "</body></html>"; }