#! /usr/bin/perl
# Autofetch category rank
#
# Originally by Ben Chatelain (http://benchatelain.com/2009/03/05/scraping-app-store-rankings-around-the-world/)
#
# Modified	2009/05/25	Rob Terrell (http://touchcentric.com)
#	accepts command-line arguments for appId, category, and free/paid
#	returns just the ranking integer or "n/a" to better integrate into workflows
#	caches data for increased speed
#
# Modified 2009/07/03	Benjamin Schuster-Boeckler (http://www.pearcomp.com)
#	Parser now based on XML::TreeBuilder: The previous regex didn't work any more
#	cleaned up the rest of the code a little bit
#	tried to fix caching: filename as date didn't work for me

package itunesRank;

use warnings;
use strict;
use XML::TreeBuilder;
use Getopt::Std;

my %args;
getopts('c:k:f', \%args);

my $appID = shift or die "Usage: $0 [-k category -c country -f (free?)] appID\n";

my $categoryName = $args{k};
my $freePaid = $args{f};
my $country = $args{c};

if (!$country) { 
	$country = "world";
}

if (!$freePaid) {
	$freePaid = "paid";
}

if (!$categoryName) {
	$categoryName = "Top Overall";
}

my %categories = (
	"Top Overall"		=> 25204,
	"Books"				=> 25470,
	"Business"			=> 25148,
	"Education"			=> 25156,
	"Entertainment"		=> 25164,
	"Finance"			=> 25172,
	"Healthcare & Fitness"		=> 25188,
	"Lifestyle"			=> 25196,
	"Medical"			=> 26321,
	"Music"				=> 25212,
	"Navigation"		=> 25220,
	"News"				=> 25228,
	"Photography"		=> 25236,
	"Productivity"		=> 25244,
	"Reference"			=> 25252,
	"Social Networking"	=> 25260,
	"Sports"			=> 25268,
	"Travel"			=> 25276,
	"Utilities"			=> 25284,
	"Weather"			=> 25292,
	"All Games"			=> 25180,
	"Games/Action"		=> 26341,
	"Games/Adventure"	=> 26351,
	"Games/Arcade"		=> 26361,
	"Games/Board"		=> 26371,
	"Games/Card"		=> 26381,
	"Games/Casino"		=> 26341,
	"Games/Dice"		=> 26341,
	"Games/Educational"	=> 26411,
	"Games/Family"		=> 26421,
	"Games/Kids"		=> 26431,
	"Games/Music"		=> 26441,
	"Games/Puzzle"		=> 26451,
	"Games/Racing"		=> 26461,
	"Games/Role Playing"=> 26471,
	"Games/Simulation"	=> 26481,
	"Games/Sports"		=> 26491,
	"Games/Strategy"	=> 26501,
	"Games/Trivia"		=> 26511,
	"Games/Word"		=> 26521,
);

my %countryIDs = (
	"United States" => 143441,
	"Argentina" => 143505,
	"Australia" => 143460,
	"Belgium" => 143446,
	"Brazil" => 143503,
	"Canada" => 143455,
	"Chile" => 143483,
	"China" => 143465,
	"Colombia" => 143501,
	"Costa Rica" => 143495,
	"Croatia" => 143494,
	"Czech Republic" => 143489,
	"Denmark" => 143458,
	"Deutschland" => 143443,
	"El Salvador" => 143506,
	"Espana" => 143454,
	"Finland" => 143447,
	"France" => 143442,
	"Greece" => 143448,
	"Guatemala" => 143504,
	"Hong Kong" => 143463,
	"Hungary" => 143482,
	"India" => 143467,
	"Indonesia" => 143476,
	"Ireland" => 143449,
	"Israel" => 143491,
	"Italia" => 143450,
	"Korea" => 143466,
	"Kuwait" => 143493,
	"Lebanon" => 143497,
	"Luxembourg" => 143451,
	"Malaysia" => 143473,
	"Mexico" => 143468,
	"Nederland" => 143452,
	"New Zealand" => 143461,
	"Norway" => 143457,
	"Osterreich" => 143445,
	"Pakistan" => 143477,
	"Panama" => 143485,
	"Peru" => 143507,
	"Phillipines" => 143474,
	"Poland" => 143478,
	"Portugal" => 143453,
	"Qatar" => 143498,
	"Romania" => 143487,
	"Russia" => 143469,
	"Saudi Arabia" => 143479,
	"Schweitz/Suisse" => 143459,
	"Singapore" => 143464,
	"Slovakia" => 143496,
	"Slovenia" => 143499,
	"South Africa" => 143472,
	"Sri Lanka" => 143486,
	"Sweden" => 143456,
	"Taiwan" => 143470,
	"Thailand" => 143475,
	"Turkey" => 143480,
	"United Arab Emirates" => 143481,
	"United Kingdom" => 143444,
	"Venezuela" => 143502,
	"Vietnam" => 143471,
	"Japan" => 143462
);

#print "$appID, $categoryName, $freePaid\n";


if ($country eq "world") {
	getAppRankingInCategoryForWorld($appID, $categories{$categoryName});
} else {
	fetchAppCategoryRankForCountry($appID, $categories{$categoryName}, $country, $countryIDs{$country});
}


#
# Subroutines
#

sub getAppRankingInCategoryForWorld {
	my ($appID, $categoryID) = @_;
	while (my ($country, $storeID) = each(%countryIDs)) {
		print "$country: ";
		fetchAppCategoryRankForCountry($appID, $categoryID, $country, $storeID);
	}
}

sub fetchAppCategoryRankForCountry {
	my ($myAppID, $categoryID, $country, $storeID) = @_;
	my ($rank, $found);
	my @top100= fetchTop100InCategory($storeID, $categoryID);
	foreach my $app (@top100) {
		$rank = $app->{rank};
		my $appID = $app->{id};
		
		if ($appID == $myAppID) {
			$found = 1;
			last;
		}
	}
	if ($found) {
		print "$rank\n";
	} else {
		print "n/a\n";
	}
}

sub fetchTop100InCategory {
	my($storeID, $categoryID) = @_;

	# 30 == paid
	# 27 == free
	my $popId;
	if ($freePaid eq 'free') {
		$popId = 27;
	}
	else { 
		$popId = 30;
	}
	
	
	# TODO: cache results
	my $filename = "/tmp/cat-$categoryID-$storeID-$freePaid.html";
	my $fetchcmd;
	if (-e $filename) {
		my @file_stats=stat($filename);
		my @time = localtime($file_stats[9]);
		my $time = sprintf("%.4d%.2d%.2d %.2d:%.2d:%.2d", $time[5]+1900, $time[4], $time[3], $time[2], $time[1], $time[0]);
		$fetchcmd = qq{curl -z $time -s -A "iTunes/4.2 (Macintosh; U; PPC Mac OS X 10.2" -H "X-Apple-Store-Front: $storeID-1" 'http://itunes.apple.com/WebObjects/MZStore.woa/wa/viewTop?id=$categoryID&popId=$popId'};
	} else {
		$fetchcmd = qq{curl -s -A "iTunes/4.2 (Macintosh; U; PPC Mac OS X 10.2" -H "X-Apple-Store-Front: $storeID-1" 'http://itunes.apple.com/WebObjects/MZStore.woa/wa/viewTop?id=$categoryID&popId=$popId'};
	}
	# Might need to pipe through these...
	# | gunzip
	# | xmllint --format -
	
	my $doc = `$fetchcmd`;
	if ($doc eq '') {
		# curl says used the cached file
		open(FILE, "<", $filename);
		local $/;
		$doc = <FILE>;
		close(FILE);
	} else {
		# cached the data
		open(FILE, ">", $filename);
		print FILE $doc;
		close(FILE);
	}
	
	my @top100;

	my $parser = XML::TreeBuilder->new;
	$parser->parse($doc);
	
	my $dom = $parser->root();
	
	my @nodes = $dom->find_by_tag_name('HBoxView');
	@nodes = @nodes[3..$#nodes]; # the first 3 HBoxViews need to be skipped
	
	foreach my $node (@nodes)
	{
	    my @urls = $node->find_by_tag_name("GotoURL");
	    my $appID;
	    foreach my $url (@urls)
	    {
		if ($url->attr('url') =~ m#http://ax.itunes.apple.com/WebObjects/MZStore.woa/wa/viewSoftware\?id=(\d+)&mt=#)
		{
		    $appID = $1;
		    last;
		}
	    }
	    
	    next unless $appID;
	    my @txts = $node->find_by_tag_name('SetFontStyle');
	    my $rank;
	    foreach my $txt (@txts)
	    {
		next unless $txt->attr('normalStyle') eq 'matrixTextFontStyle';
		my $text = $txt->as_text();
		$text =~ s/^\s*(\S.*\S)\s*$/$1/;
		$text =~ s/\n/ /g;
		if ($text =~ /^(\d+)\.$/)
		{
		    push @top100, {rank => $1, id => $appID};
		}
	    }
	}

	return @top100;
}