Somacon.com: Articles on web development, software, and hardware
§ Home > Index > Freeware

Free Froogle Feed Submission Script for Yahoo Stores

This is a Perl script to convert Yahoo! Stores XML feed into Froogle data feed, and FTP the file to Froogle's server.

Instead of using this old script, there is a completely automatic solution for you: Froogle Data Feed Service for Yahoo Stores by Aten Software LLC. Get your store listed in Froogle within minutes! Also offered: Shopping.com Data Feed for Yahoo Stores.

Listed in Web Scripts Directory.

#!/usr/local/bin/perl

# yahoo_store_export_to_froogle.pl
#
# This is a Perl script to convert Yahoo! Stores XML feed into
#   Froogle data feed, and FTP the file to Froogle's server.
# Note: Many stores are automatically indexed by Froogle.
#
# REQUIREMENTS
#  A merchant agreement/ftp account with Froogle.
#  A Yahoo! store account with store export enabled.
#
# INSTRUCTIONS
#   This script requires no command line parameters and creates no output.
#   It places the Froogle data feed into the system temporary directory.
#   Parameters for the store and Froogle user account
#     must be entered in the section labeled "CUSTOM INFORMATION".
#   The script can be tested against a saved Yahoo store's xml file
#     and alternate FTP server by setting the "DEBUG VARIABLES".
#
# FEATURES
#  Fully automated. The Froogle data format is the simple format, not
#  extended format.  Entire store product library is processed.
#  Product code is taken as the Yahoo! store created abbreviation.
#  Image URLs and Section names are handled correctly.
#  All HTML tags are removed from product captions.
#  All tabs, carraige returns, and new lines are replaced with spaces.
#  Only products with a price are sent. Orderable flag is ignored.
#
# LIMITATIONS
#  No special handling for books, music, dvd, etc.
#  No support for quantity pricing or alternate currency.
#  No support for item options.
#  No support for partial uploads.
#
# EXAMPLE CRONTAB ('crontab -e' to edit the crontab)
#] # Send Yahoo Store data to Froogle daily at 6 am
#] 0 6 * * * /home/u/user/bin/yahoo_store_export_to_froogle.pl
#
# LINKS
# Yahoo Store XML DTD
#    http://store.yahoo.com/lib/vw/StoreExport.dtd
# Store export overview
#    http://store.yahoo.com/storexport.html
# Froogle merchant info
#    http://froogle.google.com/froogle/merchants.html
# CPAN Perl documentation
# http://search.cpan.org/
# UTF8 bug under red hat when warning enabled
# http://archive.develooper.com/perl5-porters@perl.org/msg88085.html
#
# AUTHOR
#   Shailesh Humbad, March 21, 2003, http://www.somacon.com
#   This code is hereby granted to the public domain.

# LOAD MODULES
use XML::Parser;
use HTTP::Request;
use LWP::UserAgent;
use Net::FTP;
use strict;

# DEBUG VARIABLES
# set debug to 1 to enable debug mode, otherwise set to 0
my $debug = 0;
# enter file containing xml feed to use in debug mode
my $debug_xmlfilename = "storeexportdebug.xml";
# ftp server to use in debug mode
my $debug_username = "";
my $debug_password = "";
my $debug_ftpserver = "";


# CUSTOM INFORMATION
my $froogle_username = "";
my $froogle_password = "";
my $froogle_ftpserver = "";
my $store_xmlfeed_url = "http://store.yahoo.com/storename/objinfo.xml";
# set to "Windows" or "Linux"; needed to find temp directory
my $operating_system = "Linux";

# DECLARE VARIABLES
my $xmlparser;
my $temp_directory;
my $froogle_data_filename;
my $xmldata; # string containing yahoo store xml data
my $user_agent;
my @redirectable_methods = (); # no methods should be redirectable
my $response;
my $fh_outfile; # file handle to the output file
my $filedata;
my @productarray;
my %product;
my $key;
my $hashref;
my $ftp;

# DEFINE XML PARSER SUBROUTINES
$xmlparser = new XML::Parser(Handlers => {
    Start => \&tag_start,
    End => \&tag_end,
    Char => \&handle_char });

# INITIALIZE VARIABLES
if($debug) { print "Yahoo Store Export to Froogle - Debug.\n"; }

# figure out the temp directory path
if($operating_system eq "Windows")
{
    $temp_directory = $ENV{TEMP}."\\";
}
if($operating_system eq "Linux")
{
    $temp_directory = "/var/tmp/";
}

# set the filename for the temporary froogle data file
$froogle_data_filename = $temp_directory.$froogle_username.".txt";

if($debug)
{
    print "Froogle data file name is: ";
    print $froogle_data_filename."\n";
}

# open the file for output
open FH_OUTFILE, ">".$froogle_data_filename
    or die ("Error opening file for write: ".
    $froogle_data_filename);


# print column names
print FH_OUTFILE 
    "product_url\tname\tdescription\tprice\timage_url\tcategory\tcode\n";

# RETRIEVE THE ENTIRE XML FEED INTO A STRING
if (!$debug)
{
    # create a user agent
    $user_agent = LWP::UserAgent->new();
    # disable redirection
    $user_agent->requests_redirectable(\@redirectable_methods);
    # perform a get request
    $response = $user_agent->get($store_xmlfeed_url);
    # check the response
    die ("Error while getting ".$response->request->uri.
        "\nStatus-Line: ".$response->status_line."\nAborting")
      unless ($response->is_success);

    # copy the response data into a string
    $xmldata = $response->content;
}
else
{
    # read xml data from a debug file
    open DEBUGXMLFILE, "<".$debug_xmlfilename;
    while(read(DEBUGXMLFILE, $filedata, 10000))
    {
        $xmldata .= $filedata;
    }
    
    close DEBUGXMLFILE;
}

# RUN THE XML PARSER (PARSING IS DONE IN THE SUBROUTINES)
$xmlparser->parse($xmldata);


# WRITE THE PARSED DATA TO THE TEMP FILE
for $hashref (@productarray)
{
    print FH_OUTFILE $hashref->{product_url}."\t";
    print FH_OUTFILE $hashref->{name}."\t";
    print FH_OUTFILE $hashref->{description}."\t";
    print FH_OUTFILE $hashref->{price}."\t";
    print FH_OUTFILE $hashref->{image_url}."\t";
    print FH_OUTFILE $hashref->{category}."\t";
    print FH_OUTFILE $hashref->{code};
    print FH_OUTFILE "\n";
}

# debug print the parsed data
if(0)
{
    for $hashref (@productarray)
    {
        print "\n\n";
        for $key (keys %$hashref)
        {
            print "'$key' => '$hashref->{$key}' \n";
        }
    }
}

# close the output file
close (FH_OUTFILE);

if($debug)
{
    $froogle_ftpserver = $debug_ftpserver;
    $froogle_username = $debug_username;
    $froogle_password = $debug_password;
}

# UPLOAD THE FROOGLE FORMAT DATA TO FROOGLE
$ftp = Net::FTP->new
(
    $froogle_ftpserver,
    Timeout => 30
) or die "Could not connect to FTP server: $froogle_ftpserver.\n";

$ftp->login($froogle_username, $froogle_password)
    or die "Could not log in to FTP server.\n";

$ftp->put($froogle_data_filename);

$ftp->quit();

# END OF SCRIPT

# --------- XML ROUTINES ---------

# XML TAG START ROUTINE
# Start (Parser, Element [, Attr, Val [,...]])
sub tag_start {
    # Retrieve passed in values
    my $expat_instance;
    my $tagvalue;
    
    $expat_instance = shift @_;
    $tagvalue = shift @_;

    if($tagvalue eq "Product")
    {
        # reset the product data
        $product{product_url} = "";
        $product{name} = "";
        $product{description} = "";
        $product{price} = "";
        $product{image_url} = "";
        $product{orderable} = "";
        $product{category} = "";
        
        # get product code as the Id of the product tag
        while(@_)
        {
            if($_[0] eq "Id")
            {
                # save the product code and break
                $product{code} = $_[1];
            }
            # shift an attr,val pair off the parameters
            shift @_;
            shift @_;
        }

    }
}

# XML CHARACTER DATA ROUTINE
sub handle_char {
    # Retrieve passed in values
    my ($expat_instance, $tagvalue) = @_;
    # Retrieve context
    my @context = $expat_instance->context;

    # convert the XML feed to froogle format
    # froogle attributes
    if ((join " ",@context) eq 
        "StoreExport Products Product Url")
    {
        $product{product_url} .= $tagvalue;
    }
    if ((join " ",@context) eq 
        "StoreExport Products Product Description")
    {
        $product{name} .= $tagvalue;
    }
    if ((join " ",@context) eq 
        "StoreExport Products Product Caption")
    {
        $product{description} .= $tagvalue;
    }
    if ((join " ",@context) eq 
        "StoreExport Products Product Pricing BasePrice")
    {
        $product{price} .= $tagvalue;
    }
    if ((join " ",@context) eq 
        "StoreExport Products Product Picture")
    {
        # must be concatenation due to &gt;
        $product{image_url} .= $tagvalue;
    }
    # meta attributes
    if ((join " ",@context) eq 
        "StoreExport Products Product Orderable")
    {
        $product{orderable} .= $tagvalue;
    }
    if ((join " ",@context) eq 
        "StoreExport Products Product Path ProductRef")
    {
        # must be concatenation to create category name
        # and to combine names with '&'
        $product{category} .= $tagvalue;
    }
}

# XML TAG END ROUTINE
sub tag_end {
    my $key;
    # Retrieve passed in values
    my ($expat_instance, $tagvalue) = @_;

    if($tagvalue eq "Product")
    {
        # CLEAN UP THE PRODUCT DATA
        
        # trim off the trailing separator for the category
        if(length($product{category}) > 0)
        {
            $product{category} = 
                substr($product{category}, 0, 
                    length($product{category})-3);
        }
        # extract the URL for the image
        $product{image_url} =~ /.*?src\=(.*?)\>/gi;
        if($1)
        {
            $product{image_url} = $1;
        }
        
        # replace all cr, lf, and tab with spaces
        # in all fields of the hash
        foreach $key (keys %product)
        {
            if($product{$key})
            {
                $product{$key} =~ tr/\t\r\n/   /;
            }
        }
        
        # replace all html tags with empty string
        $product{description} =~ s/\<.*?\>//gi;
    
        # PUSH THE PREVIOUS PRODUCT
        if($product{price})
        {
            # do not push products without a price
            # these are either sections or unpriced items
            push @productarray, {
                code => $product{code},
                name => $product{name},
                description => $product{description},
                product_url => $product{product_url},
                image_url => $product{image_url},
                price => $product{price},
                orderable => $product{orderable},
                category => $product{category},
            };
        }
    }
    
    # add separator for each ProductRef to create section name
    if($tagvalue eq "ProductRef")
    {
        $product{category} .= " > "
    }
            
}
Link to this page: <a href="http://www.somacon.com/p167.php">Free Froogle Feed Submission Script for Yahoo Stores</a>

Contact · Search · Print · Social bookmark this page · E-mail this page
Created 2005-04-24, Last Modified 2006-12-07, © Shailesh N. Humbad
Disclaimer: This content is provided as-is. The information may be incorrect.