#!/usr/bin/perl -w use strict; $|++; use HTML::TableExtract; use Getopt::Long; use LWP; ############################################################################### # Generates a tabular list of all known World of Warhammer quests, checking # # off those you've completed. Also hides quests that aren't completable by # # your side, and separates everything by category. Data from Allakhazham. # # # # v1.5: 2008-10-28, morbus@disobey.com, email me if you use/modify. # ############################################################################### # To run this script, you'll need Perl and HTML::TableExtract. You'll also # # need an external file of completed quests, using the quest IDs from the # # wow.allakhazam.com site, one per line. Run the script as follows: # # # # perl wowquests.pl qids.txt > wowquests.html # # perl wowquests.pl --side Alliance qids.txt > wowquests.html # # # # You can also filter by minimum and maximum levels - pass the command line # # args --minlevel 25 and --maxlevel 35 to get a list keyed to said range. # # # # The default side is "Horde". For the Horde! (Ha, ha! I'm cheesy! Wheee!) # ############################################################################### # changes (2008-10-28, version 1.5): # # - added back in all the "broken" Seasonal entries. # # - Start NPC URLs are working again. # # # # changes (2008-10-22, version 1.4): # # - updated to the latest version of Allakhazham's site. # # - fixed bug where level filters missed -1 leveled quests. # # - ignore a healthy dose of broken "Seasonal" database entries. # # # # changes (2007-07-21, version 1.3): # # - min/max level filtering (thanks Edward Barton). # # # # changes (2007-02-02, version 1.2): # # - removed "Unknown" categories/zones. # # - checks reported quest IDs vs. active quest IDs. # # - 30 second wait, then re-request, if zone URL returns no data. # # - removed "Updated" images that showed up next to quest titles. # # # # changes (2006-02-09, version 1.1): # # - we now keep track of quests available per zone and total. # # - added initial support for Allakhazham's "Special Category" quests. # # - added support for some of the unlisted "Unknown" categories/zones. # # - print current zone to STDERR so we have a progress report. # # # # changes (2005-11-14, version 1.0): # # - initial public release. # ############################################################################### # which side is this character? my %options = ( 'minlevel' => 1, 'maxlevel' => 999, 'side' => 'Horde' ); GetOptions(\%options, 'minlevel=i', 'maxlevel=i', 'side=s'); # load in the quest file full of qids per newline. open(QUESTS, shift) or die "There was a quests file error: $!"; my %completed; while (<QUESTS>) { chomp; $completed{$_}++; } close(QUESTS); my %found_completed; # completed quest IDs found on Alla (for later comparison). # all our starting init crap. yawn. my $browser = LWP::UserAgent->new; my %headers = ('User-Agent' => 'Mozilla/5.0 Gecko/2008090512 Firefox/3.0.2'); my $root_url = 'http://wow.allakhazam.com'; my $quests_url = 'http://wow.allakhazam.com/db/questlist.html'; my $side_filter = '?side=' . ($options{'side'} eq 'Alliance' ? 1 : 2); # we use allakhazam's master listing of quests because it'll actually be # cheaper, bandwidth-wise, then hitting each individual zone + "next" pages. my %quests; # master list of all quests sorted by category. this'll be fun. my $quests_per_page = 75; # number of quests per single page on the site. my $current_page = 0; # current page that we're viewing; starts at 0. my $last_page = 999; # discovered through a regex; last page available. # grab all quests until the last page. while ($current_page <= $last_page) { my $current_url = $quests_url . $side_filter . '&start=' . ($current_page * $quests_per_page); my $current_data = $browser->get($current_url, %headers); # ^^ OMG, MORBUS WROTE SOME MATH! until ($current_data->is_success) { # keep trying until success. print STDERR "Download failed. Retrying current page in 30 seconds.\n"; sleep(30); $current_data = $browser->get($current_url, %headers); } # we only do this once so it won't fail on the last page (which doesn't match the regex). ($last_page) = ($last_page != 999) ? $last_page : $current_data->content =~ m!">(\d{2,})</a></td>!; if ($current_page =~ /0/) { print STDERR "Downloading quest data (page $current_page of $last_page)...\n"; } my @te_headers = ["Quest","Level","Side","Start","Reward","Category","Tags"]; my %te_config = (keep_html => 1, strip_html_on_match => 0); # bah. my $te = HTML::TableExtract->new(headers=>@te_headers, %te_config); $te->parse($current_data->content); foreach my $ts ($te->table_states) { foreach my $r ($ts->rows) { my ($category_url) = $r->[5] =~ m!<a href="(.*?)">.*?</a>!; $r->[5] =~ s/<(.*?)>//g; my $category_name = $r->[5]; $category_url = $category_url ? $category_url : ''; if (!$quests{$category_name}) { # create this new category... $quests{$category_name} = { 'name' => $category_name, 'url' => $category_url }; } # which we'll deliciously fill with quests. MmmM. yumMMmy quests. ($r->[7]) = $r->[0] =~ m!<a href=".*?>(.*?) ?</a>!; # ease sorting. push (@{$quests{$category_name}{'quests'}}, $r); } } $current_page++; } # oh, how I do love to keep useless stats. MmMMm. print STDERR "Processing downloaded quest data...\n"; my $completed_category = 0; my $completed_total = 0; my $total_category = 0; my $total_side = 0; print header(); foreach my $category (sort { $quests{$a}->{'name'} cmp $quests{$b}->{'name'} } keys %quests) { print "\n<h2><a href=\"$root_url$quests{$category}{url}\">$quests{$category}{name}</a></h2>\n"; print "<table><tr><th></th><th>ID</th><th>Name</th><th>Category</th>". "<th>Level</th><th>Start NPC</th><th>Side</th></tr>\n"; foreach my $quest (sort { $a->[7] cmp $b->[7] } @{$quests{$category}{'quests'}}) { my $s = ''; # will contain an asterisk for "we did it!" my ($id) = $quest->[0] =~ m!wquest=(\d+)!; next unless $id; $quest->[0] =~ s/"> <\/a>/">???<\/a>/; # no Name, but link? $quest->[0] =~ s/(\/db.*)/$root_url$1/g; # make a full URL. $quest->[0] =~ s/<img (.*?)>//gi; # no images please. ty. $quest->[2] = $quest->[2] ? $quest->[2] : '???'; # no Side? $quest->[3] = $quest->[3] ? $quest->[3] : '???'; # no NPC? $quest->[3] =~ s/(\/db.*)/$root_url$1/g; # make a full URL. $total_category++; # we can do this quest, so count it, eh? if ($completed{$id}) { $completed_category++; $s = '*'; } if ($completed{$id}) { $found_completed{$id}++; } # later. if ($quest->[1] != -1) { # some quests are set as -1. next if $quest->[1] <= $options{'minlevel'}; next if $quest->[1] >= $options{'maxlevel'}; } # only filter those that are positive. ish. print "<tr><td class=\"c\">$s</td><td class=\"c\">$id</td>". "<td>$quest->[0]</td><td>$quest->[5]</td><td class=\"c\" width=\"35\">$quest->[1]</td>". "<td>$quest->[3]</td><td class=\"c\" width=\"8\">$quest->[2]</td></tr>\n"; } print "</table>"; # all done this category, so print out some stats. print "<p>Listed quests matching our side: $total_category. "; # stat one. mmMm. print "Quests completed for this category: $completed_category.</p>\n"; # stat two. $completed_total += $completed_category; $completed_category = 0; # SupPpeERR. $total_side += $total_category; $total_category = 0; # BOoirring. } # check reported player completions (in %completed) vs. the quest IDs we've # found (in %found_completed) on the remote site and report discrepancies. print STDERR "Looking for quest IDs not in active data (potential error in data).\n"; foreach my $completed (sort keys %completed) { if (!$found_completed{$completed}) { print STDERR "Quest ID $completed not found in active data.\n"; } } # HTML footer and final counts. cos counting is fun. print "\n<p>Total listed quests matching our side: $total_side. "; print "Total quests completed: $completed_total.</p>"; print STDERR "Done.\n"; print footer(); ################################################################ # HTML headers and footers. nothing too exciting here. # ################################################################ sub header { my $updated = localtime(time); return <<EVIL_HEREDOC_HEADER_OF_ORMS_BY_GORE; <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head><title>World of Warcraft Quest Tracker</title> <style type="text/css"><!-- body { font-size:11px;margin:1em;font-family:arial,sans-serif; } a { text-decoration:none; } tr.even td { background-color:#edf3fe; } th { border:1px solid rgb(196,196,196);background-color:rgb(248,248,248); } table { border:1px;margin-left:5px;margin-right:5px;padding:2px;width:98% } tr, td { border:1px solid rgb(235,235,235);border-top:0px; } p { margin-left:5px;margin-top:0; } .c { text-align:center; } h1, h2, h3 { background-color:transparent;color:#001080; } //--></style> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> <script type="text/javascript"><!-- onload = function() { stripe(); }; function stripe() { var tables = document.getElementsByTagName("table"); for (var i = 0; i < tables.length; i += 1) { var trs = tables[i].getElementsByTagName("tr"); for (var j = 0; j < trs.length; j += 2) { trs[j].className += " even"; } } } // --></script> </head><body><h1>World of Warcraft Quest Tracker ($options{side})</h1> <p style="font-size:1.4em;">The below contains a listing for all known World of Warcraft quests available to the $options{side}, and was last generated $updated. It was <a href="http://www.disobey.com/d/code/wowquests.pl">created by a Perl script from Morbus Iff</a> that collects quest and category data from the <a href="http://wow.allakhazam.com/">Allakhazam World of Warcraft site</a>. Quests with asterisks next to them have been completed for this particular character.</p> EVIL_HEREDOC_HEADER_OF_ORMS_BY_GORE } sub footer { return "</body></html>"; }