This is a Perl script to convert Yahoo! Stores XML feed into Froogle data feed, and FTP the file to Froogle's server.
Instead of using this old script, there is a completely automatic solution for you: Froogle Data Feed Service for Yahoo Stores by Aten Software LLC. Get your store listed in Froogle within minutes! Also offered: Shopping.com Data Feed for Yahoo Stores.
Listed in Web Scripts Directory.
#!/usr/local/bin/perl # yahoo_store_export_to_froogle.pl # # This is a Perl script to convert Yahoo! Stores XML feed into # Froogle data feed, and FTP the file to Froogle's server. # Note: Many stores are automatically indexed by Froogle. # # REQUIREMENTS # A merchant agreement/ftp account with Froogle. # A Yahoo! store account with store export enabled. # # INSTRUCTIONS # This script requires no command line parameters and creates no output. # It places the Froogle data feed into the system temporary directory. # Parameters for the store and Froogle user account # must be entered in the section labeled "CUSTOM INFORMATION". # The script can be tested against a saved Yahoo store's xml file # and alternate FTP server by setting the "DEBUG VARIABLES". # # FEATURES # Fully automated. The Froogle data format is the simple format, not # extended format. Entire store product library is processed. # Product code is taken as the Yahoo! store created abbreviation. # Image URLs and Section names are handled correctly. # All HTML tags are removed from product captions. # All tabs, carraige returns, and new lines are replaced with spaces. # Only products with a price are sent. Orderable flag is ignored. # # LIMITATIONS # No special handling for books, music, dvd, etc. # No support for quantity pricing or alternate currency. # No support for item options. # No support for partial uploads. # # EXAMPLE CRONTAB ('crontab -e' to edit the crontab) #] # Send Yahoo Store data to Froogle daily at 6 am #] 0 6 * * * /home/u/user/bin/yahoo_store_export_to_froogle.pl # # LINKS # Yahoo Store XML DTD # http://store.yahoo.com/lib/vw/StoreExport.dtd # Store export overview # http://store.yahoo.com/storexport.html # Froogle merchant info # http://froogle.google.com/froogle/merchants.html # CPAN Perl documentation # http://search.cpan.org/ # UTF8 bug under red hat when warning enabled # http://archive.develooper.com/perl5-porters@perl.org/msg88085.html # # AUTHOR # Shailesh Humbad, March 21, 2003, http://www.somacon.com # This code is hereby granted to the public domain. # LOAD MODULES use XML::Parser; use HTTP::Request; use LWP::UserAgent; use Net::FTP; use strict; # DEBUG VARIABLES # set debug to 1 to enable debug mode, otherwise set to 0 my $debug = 0; # enter file containing xml feed to use in debug mode my $debug_xmlfilename = "storeexportdebug.xml"; # ftp server to use in debug mode my $debug_username = ""; my $debug_password = ""; my $debug_ftpserver = ""; # CUSTOM INFORMATION my $froogle_username = ""; my $froogle_password = ""; my $froogle_ftpserver = ""; my $store_xmlfeed_url = "http://store.yahoo.com/storename/objinfo.xml"; # set to "Windows" or "Linux"; needed to find temp directory my $operating_system = "Linux"; # DECLARE VARIABLES my $xmlparser; my $temp_directory; my $froogle_data_filename; my $xmldata; # string containing yahoo store xml data my $user_agent; my @redirectable_methods = (); # no methods should be redirectable my $response; my $fh_outfile; # file handle to the output file my $filedata; my @productarray; my %product; my $key; my $hashref; my $ftp; # DEFINE XML PARSER SUBROUTINES $xmlparser = new XML::Parser(Handlers => { Start => \&tag_start, End => \&tag_end, Char => \&handle_char }); # INITIALIZE VARIABLES if($debug) { print "Yahoo Store Export to Froogle - Debug.\n"; } # figure out the temp directory path if($operating_system eq "Windows") { $temp_directory = $ENV{TEMP}."\\"; } if($operating_system eq "Linux") { $temp_directory = "/var/tmp/"; } # set the filename for the temporary froogle data file $froogle_data_filename = $temp_directory.$froogle_username.".txt"; if($debug) { print "Froogle data file name is: "; print $froogle_data_filename."\n"; } # open the file for output open FH_OUTFILE, ">".$froogle_data_filename or die ("Error opening file for write: ". $froogle_data_filename); # print column names print FH_OUTFILE "product_url\tname\tdescription\tprice\timage_url\tcategory\tcode\n"; # RETRIEVE THE ENTIRE XML FEED INTO A STRING if (!$debug) { # create a user agent $user_agent = LWP::UserAgent->new(); # disable redirection $user_agent->requests_redirectable(\@redirectable_methods); # perform a get request $response = $user_agent->get($store_xmlfeed_url); # check the response die ("Error while getting ".$response->request->uri. "\nStatus-Line: ".$response->status_line."\nAborting") unless ($response->is_success); # copy the response data into a string $xmldata = $response->content; } else { # read xml data from a debug file open DEBUGXMLFILE, "<".$debug_xmlfilename; while(read(DEBUGXMLFILE, $filedata, 10000)) { $xmldata .= $filedata; } close DEBUGXMLFILE; } # RUN THE XML PARSER (PARSING IS DONE IN THE SUBROUTINES) $xmlparser->parse($xmldata); # WRITE THE PARSED DATA TO THE TEMP FILE for $hashref (@productarray) { print FH_OUTFILE $hashref->{product_url}."\t"; print FH_OUTFILE $hashref->{name}."\t"; print FH_OUTFILE $hashref->{description}."\t"; print FH_OUTFILE $hashref->{price}."\t"; print FH_OUTFILE $hashref->{image_url}."\t"; print FH_OUTFILE $hashref->{category}."\t"; print FH_OUTFILE $hashref->{code}; print FH_OUTFILE "\n"; } # debug print the parsed data if(0) { for $hashref (@productarray) { print "\n\n"; for $key (keys %$hashref) { print "'$key' => '$hashref->{$key}' \n"; } } } # close the output file close (FH_OUTFILE); if($debug) { $froogle_ftpserver = $debug_ftpserver; $froogle_username = $debug_username; $froogle_password = $debug_password; } # UPLOAD THE FROOGLE FORMAT DATA TO FROOGLE $ftp = Net::FTP->new ( $froogle_ftpserver, Timeout => 30 ) or die "Could not connect to FTP server: $froogle_ftpserver.\n"; $ftp->login($froogle_username, $froogle_password) or die "Could not log in to FTP server.\n"; $ftp->put($froogle_data_filename); $ftp->quit(); # END OF SCRIPT # --------- XML ROUTINES --------- # XML TAG START ROUTINE # Start (Parser, Element [, Attr, Val [,...]]) sub tag_start { # Retrieve passed in values my $expat_instance; my $tagvalue; $expat_instance = shift @_; $tagvalue = shift @_; if($tagvalue eq "Product") { # reset the product data $product{product_url} = ""; $product{name} = ""; $product{description} = ""; $product{price} = ""; $product{image_url} = ""; $product{orderable} = ""; $product{category} = ""; # get product code as the Id of the product tag while(@_) { if($_[0] eq "Id") { # save the product code and break $product{code} = $_[1]; } # shift an attr,val pair off the parameters shift @_; shift @_; } } } # XML CHARACTER DATA ROUTINE sub handle_char { # Retrieve passed in values my ($expat_instance, $tagvalue) = @_; # Retrieve context my @context = $expat_instance->context; # convert the XML feed to froogle format # froogle attributes if ((join " ",@context) eq "StoreExport Products Product Url") { $product{product_url} .= $tagvalue; } if ((join " ",@context) eq "StoreExport Products Product Description") { $product{name} .= $tagvalue; } if ((join " ",@context) eq "StoreExport Products Product Caption") { $product{description} .= $tagvalue; } if ((join " ",@context) eq "StoreExport Products Product Pricing BasePrice") { $product{price} .= $tagvalue; } if ((join " ",@context) eq "StoreExport Products Product Picture") { # must be concatenation due to > $product{image_url} .= $tagvalue; } # meta attributes if ((join " ",@context) eq "StoreExport Products Product Orderable") { $product{orderable} .= $tagvalue; } if ((join " ",@context) eq "StoreExport Products Product Path ProductRef") { # must be concatenation to create category name # and to combine names with '&' $product{category} .= $tagvalue; } } # XML TAG END ROUTINE sub tag_end { my $key; # Retrieve passed in values my ($expat_instance, $tagvalue) = @_; if($tagvalue eq "Product") { # CLEAN UP THE PRODUCT DATA # trim off the trailing separator for the category if(length($product{category}) > 0) { $product{category} = substr($product{category}, 0, length($product{category})-3); } # extract the URL for the image $product{image_url} =~ /.*?src\=(.*?)\>/gi; if($1) { $product{image_url} = $1; } # replace all cr, lf, and tab with spaces # in all fields of the hash foreach $key (keys %product) { if($product{$key}) { $product{$key} =~ tr/\t\r\n/ /; } } # replace all html tags with empty string $product{description} =~ s/\<.*?\>//gi; # PUSH THE PREVIOUS PRODUCT if($product{price}) { # do not push products without a price # these are either sections or unpriced items push @productarray, { code => $product{code}, name => $product{name}, description => $product{description}, product_url => $product{product_url}, image_url => $product{image_url}, price => $product{price}, orderable => $product{orderable}, category => $product{category}, }; } } # add separator for each ProductRef to create section name if($tagvalue eq "ProductRef") { $product{category} .= " > " } }