Free Google Product Search Submission Script for Yahoo Stores
This is a Perl script to convert Yahoo! Stores XML feed into Google Base/Merchant Center data feed, and FTP the file to Google Base’s server.
Instead of using this old script, there is a completely automatic solution for you: Yahoo Stores Data Feed to Google Merchant Center and More by Aten Software LLC. Get your store listed in Google Product Search within minutes! Now offering Yahoo data feeds to any shopping engine, such as Shopping.com, Shopzilla, NexTag, Bing Shopping, TheFind, Amazon Product Ads, and more.
#!/usr/local/bin/perl # yahoo_store_export_to_froogle.pl # # This is a Perl script to convert Yahoo! Stores XML feed into # Froogle data feed, and FTP the file to Froogle's server. # Note: Many stores are automatically indexed by Froogle. # # REQUIREMENTS # A merchant agreement/ftp account with Froogle. # A Yahoo! store account with store export enabled. # # INSTRUCTIONS # This script requires no command line parameters and creates no output. # It places the Froogle data feed into the system temporary directory. # Parameters for the store and Froogle user account # must be entered in the section labeled "CUSTOM INFORMATION". # The script can be tested against a saved Yahoo store's xml file # and alternate FTP server by setting the "DEBUG VARIABLES". # # FEATURES # Fully automated. The Froogle data format is the simple format, not # extended format. Entire store product library is processed. # Product code is taken as the Yahoo! store created abbreviation. # Image URLs and Section names are handled correctly. # All HTML tags are removed from product captions. # All tabs, carraige returns, and new lines are replaced with spaces. # Only products with a price are sent. Orderable flag is ignored. # # LIMITATIONS # No special handling for books, music, dvd, etc. # No support for quantity pricing or alternate currency. # No support for item options. # No support for partial uploads. # # EXAMPLE CRONTAB ('crontab -e' to edit the crontab) #] # Send Yahoo Store data to Froogle daily at 6 am #] 0 6 * * * /home/u/user/bin/yahoo_store_export_to_froogle.pl # # LINKS # Yahoo Store XML DTD # http://store.yahoo.com/lib/vw/StoreExport.dtd # Store export overview # http://store.yahoo.com/storexport.html # Froogle merchant info # http://froogle.google.com/froogle/merchants.html # CPAN Perl documentation # http://search.cpan.org/ # UTF8 bug under red hat when warning enabled # http://archive.develooper.com/perl5-porters@perl.org/msg88085.html # # AUTHOR # Shailesh Humbad, March 21, 2003, https://www.somacon.com/ # This code is hereby granted to the public domain. # LOAD MODULES use XML::Parser; use HTTP::Request; use LWP::UserAgent; use Net::FTP; use strict; # DEBUG VARIABLES # set debug to 1 to enable debug mode, otherwise set to 0 my $debug = 0; # enter file containing xml feed to use in debug mode my $debug_xmlfilename = "storeexportdebug.xml"; # ftp server to use in debug mode my $debug_username = ""; my $debug_password = ""; my $debug_ftpserver = ""; # CUSTOM INFORMATION my $froogle_username = ""; my $froogle_password = ""; my $froogle_ftpserver = ""; my $store_xmlfeed_url = "http://store.yahoo.com/storename/objinfo.xml"; # set to "Windows" or "Linux"; needed to find temp directory my $operating_system = "Linux"; # DECLARE VARIABLES my $xmlparser; my $temp_directory; my $froogle_data_filename; my $xmldata; # string containing yahoo store xml data my $user_agent; my @redirectable_methods = (); # no methods should be redirectable my $response; my $fh_outfile; # file handle to the output file my $filedata; my @productarray; my %product; my $key; my $hashref; my $ftp; # DEFINE XML PARSER SUBROUTINES $xmlparser = new XML::Parser(Handlers => { Start => \&tag_start, End => \&tag_end, Char => \&handle_char }); # INITIALIZE VARIABLES if($debug) { print "Yahoo Store Export to Froogle - Debug.\n"; } # figure out the temp directory path if($operating_system eq "Windows") { $temp_directory = $ENV{TEMP}."\\"; } if($operating_system eq "Linux") { $temp_directory = "/var/tmp/"; } # set the filename for the temporary froogle data file $froogle_data_filename = $temp_directory.$froogle_username.".txt"; if($debug) { print "Froogle data file name is: "; print $froogle_data_filename."\n"; } # open the file for output open FH_OUTFILE, ">".$froogle_data_filename or die ("Error opening file for write: ". $froogle_data_filename); # print column names print FH_OUTFILE "product_url\tname\tdescription\tprice\timage_url\tcategory\tcode\n"; # RETRIEVE THE ENTIRE XML FEED INTO A STRING if (!$debug) { # create a user agent $user_agent = LWP::UserAgent->new(); # disable redirection $user_agent->requests_redirectable(\@redirectable_methods); # perform a get request $response = $user_agent->get($store_xmlfeed_url); # check the response die ("Error while getting ".$response->request->uri. "\nStatus-Line: ".$response->status_line."\nAborting") unless ($response->is_success); # copy the response data into a string $xmldata = $response->content; } else { # read xml data from a debug file open DEBUGXMLFILE, "<".$debug_xmlfilename; while(read(DEBUGXMLFILE, $filedata, 10000)) { $xmldata .= $filedata; } close DEBUGXMLFILE; } # RUN THE XML PARSER (PARSING IS DONE IN THE SUBROUTINES) $xmlparser->parse($xmldata); # WRITE THE PARSED DATA TO THE TEMP FILE for $hashref (@productarray) { print FH_OUTFILE $hashref->{product_url}."\t"; print FH_OUTFILE $hashref->{name}."\t"; print FH_OUTFILE $hashref->{description}."\t"; print FH_OUTFILE $hashref->{price}."\t"; print FH_OUTFILE $hashref->{image_url}."\t"; print FH_OUTFILE $hashref->{category}."\t"; print FH_OUTFILE $hashref->{code}; print FH_OUTFILE "\n"; } # debug print the parsed data if(0) { for $hashref (@productarray) { print "\n\n"; for $key (keys %$hashref) { print "'$key' => '$hashref->{$key}' \n"; } } } # close the output file close (FH_OUTFILE); if($debug) { $froogle_ftpserver = $debug_ftpserver; $froogle_username = $debug_username; $froogle_password = $debug_password; } # UPLOAD THE FROOGLE FORMAT DATA TO FROOGLE $ftp = Net::FTP->new ( $froogle_ftpserver, Timeout => 30 ) or die "Could not connect to FTP server: $froogle_ftpserver.\n"; $ftp->login($froogle_username, $froogle_password) or die "Could not log in to FTP server.\n"; $ftp->put($froogle_data_filename); $ftp->quit(); # END OF SCRIPT # --------- XML ROUTINES --------- # XML TAG START ROUTINE # Start (Parser, Element [, Attr, Val [,...]]) sub tag_start { # Retrieve passed in values my $expat_instance; my $tagvalue; $expat_instance = shift @_; $tagvalue = shift @_; if($tagvalue eq "Product") { # reset the product data $product{product_url} = ""; $product{name} = ""; $product{description} = ""; $product{price} = ""; $product{image_url} = ""; $product{orderable} = ""; $product{category} = ""; # get product code as the Id of the product tag while(@_) { if($_[0] eq "Id") { # save the product code and break $product{code} = $_[1]; } # shift an attr,val pair off the parameters shift @_; shift @_; } } } # XML CHARACTER DATA ROUTINE sub handle_char { # Retrieve passed in values my ($expat_instance, $tagvalue) = @_; # Retrieve context my @context = $expat_instance->context; # convert the XML feed to froogle format # froogle attributes if ((join " ",@context) eq "StoreExport Products Product Url") { $product{product_url} .= $tagvalue; } if ((join " ",@context) eq "StoreExport Products Product Description") { $product{name} .= $tagvalue; } if ((join " ",@context) eq "StoreExport Products Product Caption") { $product{description} .= $tagvalue; } if ((join " ",@context) eq "StoreExport Products Product Pricing BasePrice") { $product{price} .= $tagvalue; } if ((join " ",@context) eq "StoreExport Products Product Picture") { # must be concatenation due to > $product{image_url} .= $tagvalue; } # meta attributes if ((join " ",@context) eq "StoreExport Products Product Orderable") { $product{orderable} .= $tagvalue; } if ((join " ",@context) eq "StoreExport Products Product Path ProductRef") { # must be concatenation to create category name # and to combine names with '&' $product{category} .= $tagvalue; } } # XML TAG END ROUTINE sub tag_end { my $key; # Retrieve passed in values my ($expat_instance, $tagvalue) = @_; if($tagvalue eq "Product") { # CLEAN UP THE PRODUCT DATA # trim off the trailing separator for the category if(length($product{category}) > 0) { $product{category} = substr($product{category}, 0, length($product{category})-3); } # extract the URL for the image $product{image_url} =~ /.*?src\=(.*?)\>/gi; if($1) { $product{image_url} = $1; } # replace all cr, lf, and tab with spaces # in all fields of the hash foreach $key (keys %product) { if($product{$key}) { $product{$key} =~ tr/\t\r\n/ /; } } # replace all html tags with empty string $product{description} =~ s/\<.*?\>//gi; # PUSH THE PREVIOUS PRODUCT if($product{price}) { # do not push products without a price # these are either sections or unpriced items push @productarray, { code => $product{code}, name => $product{name}, description => $product{description}, product_url => $product{product_url}, image_url => $product{image_url}, price => $product{price}, orderable => $product{orderable}, category => $product{category}, }; } } # add separator for each ProductRef to create section name if($tagvalue eq "ProductRef") { $product{category} .= " > " } }
Have you heard of the new, free Automated Feeds offered by Google Merchant Center?
Learn more in Aten Software's latest blog post comparing them to traditional data feed files.
Created 2005-04-24,
Last Modified 2018-01-25,
© Shailesh N. Humbad
Disclaimer: This content is provided as-is. The information may be incorrect.
Disclaimer: This content is provided as-is. The information may be incorrect.