#!/usr/bin/perl ############################################################################### # Integrates with Delicious Library and scrapes data from comics.org # # # # v1.0: 2007-11-06, morbus@disobey.com, email me if you use/modify. # ############################################################################### # This script requires a healthy dose of external dependencies. Please read # # http://www.disobey.com/node/1820 for this pre-configuration and history. # ############################################################################### # changes (2007-11-06, version 1.0): # # - initial public release. # ############################################################################### use warnings; use strict; use CGI qw/:standard/; use LWP::Simple; print "Content-type: text/html\n\n"; # delicious library sends us B0xxx69268 if (param('ItemId') !~ /B0x{1,}/) { exit; } my %gcd_data; # holder of goodies used in final xml. my $gcd_id = param('ItemId'); $gcd_id =~ s/B0x{1,}//; my $gcd_lookup = 'http://comics.org/details.lasso?id='. $gcd_id; my $gcd_dump = get($gcd_lookup); # 'ere she goes, keptin. $gcd_dump =~ s/(\n|\r)//g; # strip all newlines. # and start slurping in relevant data by any means necessary. ($gcd_data{'title'}) = $gcd_dump =~ m!<font size="5".*?face=".*?"><b>(.*)</b></font></td>!; ($gcd_data{'publisher'}) = $gcd_dump =~ m!- (.*?), .*? ?\d{4}, coverprice!; ($gcd_data{'release_date'}) = $gcd_dump =~ m!.*?, (.*? ?\d{4}), coverprice!; ($gcd_data{'price_retail'}) = $gcd_dump =~ m!coverprice (\d+\.\d+)!; ($gcd_data{'genre'}) = $gcd_dump =~ m!Genre:</b> (.*?)</font>!; ($gcd_data{'pages'}) = $gcd_dump =~ m!(\d{1,}) pages!; # now, get the cover image URL. my $gcd_image = 'http://comics.org/coverview.lasso?id='. $gcd_id .'&zoom=4'; my $gcd_image_dump = get($gcd_image); # this is a quickie lookup to fill in LargeImage:URL. ($gcd_data{'image'}) = $gcd_image_dump =~ m!<img src="(.*?)"!; # yep. that's about it, bub. print <<EVIL_HEREDOC_OF_ORMS_BY_GORE; <?xml version="1.0" encoding="utf-8"?> <ItemLookupResponse xmlns="http://webservices.amazon.com/AWSECommerceService/2005-07-26"> <Items> <Item> <LargeImage> <URL>$gcd_data{image}</URL> </LargeImage> <ItemAttributes> <Binding>Comic Book</Binding> <ListPrice> <FormattedPrice>\$$gcd_data{price_retail}</FormattedPrice> </ListPrice> <Publisher>$gcd_data{publisher}</Publisher> <ReleaseDate>$gcd_data{release_date}</ReleaseDate> <Title>$gcd_data{title}</Title> <NumberOfPages>$gcd_data{pages}</NumberOfPages> </ItemAttributes> <EditorialReviews> <EditorialReview> <Content></Content> </EditorialReview> </EditorialReviews> <BrowseNodes> <BrowseNode> <Name>$gcd_data{genre}</Name> </BrowseNode> </BrowseNodes> </Item> </Items> </ItemLookupResponse> EVIL_HEREDOC_OF_ORMS_BY_GORE