Somacon.com: Articles on websites & etc.

§ Home > Index > Freeware

Free Google Product Search Submission Script for Yahoo Stores

This is a Perl script to convert Yahoo! Stores XML feed into Google Base/Merchant Center data feed, and FTP the file to Google Base’s server.

Instead of using this old script, there is a completely automatic solution for you: Yahoo Stores Data Feed to Google Merchant Center and More by Aten Software LLC. Get your store listed in Google Product Search within minutes! Now offering Yahoo data feeds to any shopping engine, such as Shopping.com, Shopzilla, NexTag, Bing Shopping, TheFind, Amazon Product Ads, and more.

#!/usr/local/bin/perl

# yahoo_store_export_to_froogle.pl
#
# This is a Perl script to convert Yahoo! Stores XML feed into
#   Froogle data feed, and FTP the file to Froogle's server.
# Note: Many stores are automatically indexed by Froogle.
#
# REQUIREMENTS
#  A merchant agreement/ftp account with Froogle.
#  A Yahoo! store account with store export enabled.
#
# INSTRUCTIONS
#   This script requires no command line parameters and creates no output.
#   It places the Froogle data feed into the system temporary directory.
#   Parameters for the store and Froogle user account
#     must be entered in the section labeled "CUSTOM INFORMATION".
#   The script can be tested against a saved Yahoo store's xml file
#     and alternate FTP server by setting the "DEBUG VARIABLES".
#
# FEATURES
#  Fully automated. The Froogle data format is the simple format, not
#  extended format.  Entire store product library is processed.
#  Product code is taken as the Yahoo! store created abbreviation.
#  Image URLs and Section names are handled correctly.
#  All HTML tags are removed from product captions.
#  All tabs, carraige returns, and new lines are replaced with spaces.
#  Only products with a price are sent. Orderable flag is ignored.
#
# LIMITATIONS
#  No special handling for books, music, dvd, etc.
#  No support for quantity pricing or alternate currency.
#  No support for item options.
#  No support for partial uploads.
#
# EXAMPLE CRONTAB ('crontab -e' to edit the crontab)
#] # Send Yahoo Store data to Froogle daily at 6 am
#] 0 6 * * * /home/u/user/bin/yahoo_store_export_to_froogle.pl
#
# LINKS
# Yahoo Store XML DTD
#    http://store.yahoo.com/lib/vw/StoreExport.dtd
# Store export overview
#    http://store.yahoo.com/storexport.html
# Froogle merchant info
#    http://froogle.google.com/froogle/merchants.html
# CPAN Perl documentation
# http://search.cpan.org/
# UTF8 bug under red hat when warning enabled
# http://archive.develooper.com/perl5-porters@perl.org/msg88085.html
#
# AUTHOR
#   Shailesh Humbad, March 21, 2003, https://www.somacon.com/
#   This code is hereby granted to the public domain.

# LOAD MODULES
use XML::Parser;
use HTTP::Request;
use LWP::UserAgent;
use Net::FTP;
use strict;

# DEBUG VARIABLES
# set debug to 1 to enable debug mode, otherwise set to 0
my $debug = 0;
# enter file containing xml feed to use in debug mode
my $debug_xmlfilename = "storeexportdebug.xml";
# ftp server to use in debug mode
my $debug_username = "";
my $debug_password = "";
my $debug_ftpserver = "";


# CUSTOM INFORMATION
my $froogle_username = "";
my $froogle_password = "";
my $froogle_ftpserver = "";
my $store_xmlfeed_url = "http://store.yahoo.com/storename/objinfo.xml";
# set to "Windows" or "Linux"; needed to find temp directory
my $operating_system = "Linux";

# DECLARE VARIABLES
my $xmlparser;
my $temp_directory;
my $froogle_data_filename;
my $xmldata; # string containing yahoo store xml data
my $user_agent;
my @redirectable_methods = (); # no methods should be redirectable
my $response;
my $fh_outfile; # file handle to the output file
my $filedata;
my @productarray;
my %product;
my $key;
my $hashref;
my $ftp;

# DEFINE XML PARSER SUBROUTINES
$xmlparser = new XML::Parser(Handlers => {
    Start => \&tag_start,
    End => \&tag_end,
    Char => \&handle_char });

# INITIALIZE VARIABLES
if($debug) { print "Yahoo Store Export to Froogle - Debug.\n"; }

# figure out the temp directory path
if($operating_system eq "Windows")
{
    $temp_directory = $ENV{TEMP}."\\";
}
if($operating_system eq "Linux")
{
    $temp_directory = "/var/tmp/";
}

# set the filename for the temporary froogle data file
$froogle_data_filename = $temp_directory.$froogle_username.".txt";

if($debug)
{
    print "Froogle data file name is: ";
    print $froogle_data_filename."\n";
}

# open the file for output
open FH_OUTFILE, ">".$froogle_data_filename
    or die ("Error opening file for write: ".
    $froogle_data_filename);


# print column names
print FH_OUTFILE 
    "product_url\tname\tdescription\tprice\timage_url\tcategory\tcode\n";

# RETRIEVE THE ENTIRE XML FEED INTO A STRING
if (!$debug)
{
    # create a user agent
    $user_agent = LWP::UserAgent->new();
    # disable redirection
    $user_agent->requests_redirectable(\@redirectable_methods);
    # perform a get request
    $response = $user_agent->get($store_xmlfeed_url);
    # check the response
    die ("Error while getting ".$response->request->uri.
        "\nStatus-Line: ".$response->status_line."\nAborting")
      unless ($response->is_success);

    # copy the response data into a string
    $xmldata = $response->content;
}
else
{
    # read xml data from a debug file
    open DEBUGXMLFILE, "<".$debug_xmlfilename;
    while(read(DEBUGXMLFILE, $filedata, 10000))
    {
        $xmldata .= $filedata;
    }
    
    close DEBUGXMLFILE;
}

# RUN THE XML PARSER (PARSING IS DONE IN THE SUBROUTINES)
$xmlparser->parse($xmldata);


# WRITE THE PARSED DATA TO THE TEMP FILE
for $hashref (@productarray)
{
    print FH_OUTFILE $hashref->{product_url}."\t";
    print FH_OUTFILE $hashref->{name}."\t";
    print FH_OUTFILE $hashref->{description}."\t";
    print FH_OUTFILE $hashref->{price}."\t";
    print FH_OUTFILE $hashref->{image_url}."\t";
    print FH_OUTFILE $hashref->{category}."\t";
    print FH_OUTFILE $hashref->{code};
    print FH_OUTFILE "\n";
}

# debug print the parsed data
if(0)
{
    for $hashref (@productarray)
    {
        print "\n\n";
        for $key (keys %$hashref)
        {
            print "'$key' => '$hashref->{$key}' \n";
        }
    }
}

# close the output file
close (FH_OUTFILE);

if($debug)
{
    $froogle_ftpserver = $debug_ftpserver;
    $froogle_username = $debug_username;
    $froogle_password = $debug_password;
}

# UPLOAD THE FROOGLE FORMAT DATA TO FROOGLE
$ftp = Net::FTP->new
(
    $froogle_ftpserver,
    Timeout => 30
) or die "Could not connect to FTP server: $froogle_ftpserver.\n";

$ftp->login($froogle_username, $froogle_password)
    or die "Could not log in to FTP server.\n";

$ftp->put($froogle_data_filename);

$ftp->quit();

# END OF SCRIPT

# --------- XML ROUTINES ---------

# XML TAG START ROUTINE
# Start (Parser, Element [, Attr, Val [,...]])
sub tag_start {
    # Retrieve passed in values
    my $expat_instance;
    my $tagvalue;
    
    $expat_instance = shift @_;
    $tagvalue = shift @_;

    if($tagvalue eq "Product")
    {
        # reset the product data
        $product{product_url} = "";
        $product{name} = "";
        $product{description} = "";
        $product{price} = "";
        $product{image_url} = "";
        $product{orderable} = "";
        $product{category} = "";
        
        # get product code as the Id of the product tag
        while(@_)
        {
            if($_[0] eq "Id")
            {
                # save the product code and break
                $product{code} = $_[1];
            }
            # shift an attr,val pair off the parameters
            shift @_;
            shift @_;
        }

    }
}

# XML CHARACTER DATA ROUTINE
sub handle_char {
    # Retrieve passed in values
    my ($expat_instance, $tagvalue) = @_;
    # Retrieve context
    my @context = $expat_instance->context;

    # convert the XML feed to froogle format
    # froogle attributes
    if ((join " ",@context) eq 
        "StoreExport Products Product Url")
    {
        $product{product_url} .= $tagvalue;
    }
    if ((join " ",@context) eq 
        "StoreExport Products Product Description")
    {
        $product{name} .= $tagvalue;
    }
    if ((join " ",@context) eq 
        "StoreExport Products Product Caption")
    {
        $product{description} .= $tagvalue;
    }
    if ((join " ",@context) eq 
        "StoreExport Products Product Pricing BasePrice")
    {
        $product{price} .= $tagvalue;
    }
    if ((join " ",@context) eq 
        "StoreExport Products Product Picture")
    {
        # must be concatenation due to &gt;
        $product{image_url} .= $tagvalue;
    }
    # meta attributes
    if ((join " ",@context) eq 
        "StoreExport Products Product Orderable")
    {
        $product{orderable} .= $tagvalue;
    }
    if ((join " ",@context) eq 
        "StoreExport Products Product Path ProductRef")
    {
        # must be concatenation to create category name
        # and to combine names with '&'
        $product{category} .= $tagvalue;
    }
}

# XML TAG END ROUTINE
sub tag_end {
    my $key;
    # Retrieve passed in values
    my ($expat_instance, $tagvalue) = @_;

    if($tagvalue eq "Product")
    {
        # CLEAN UP THE PRODUCT DATA
        
        # trim off the trailing separator for the category
        if(length($product{category}) > 0)
        {
            $product{category} = 
                substr($product{category}, 0, 
                    length($product{category})-3);
        }
        # extract the URL for the image
        $product{image_url} =~ /.*?src\=(.*?)\>/gi;
        if($1)
        {
            $product{image_url} = $1;
        }
        
        # replace all cr, lf, and tab with spaces
        # in all fields of the hash
        foreach $key (keys %product)
        {
            if($product{$key})
            {
                $product{$key} =~ tr/\t\r\n/   /;
            }
        }
        
        # replace all html tags with empty string
        $product{description} =~ s/\<.*?\>//gi;
    
        # PUSH THE PREVIOUS PRODUCT
        if($product{price})
        {
            # do not push products without a price
            # these are either sections or unpriced items
            push @productarray, {
                code => $product{code},
                name => $product{name},
                description => $product{description},
                product_url => $product{product_url},
                image_url => $product{image_url},
                price => $product{price},
                orderable => $product{orderable},
                category => $product{category},
            };
        }
    }
    
    # add separator for each ProductRef to create section name
    if($tagvalue eq "ProductRef")
    {
        $product{category} .= " > "
    }
            
}

Have you heard of the new, free Automated Feeds offered by Google Merchant Center? Learn more in Aten Software's latest blog post comparing them to traditional data feed files.
Created 2005-04-24, Last Modified 2018-01-25, © Shailesh N. Humbad
Disclaimer: This content is provided as-is. The information may be incorrect.