# $Id: search.rb,v 1.141 2006/08/08 18:54:03 ianmacd Exp $
#
require 'amazon'
require 'amazon/search/cache'
require 'net/http'
require 'uri'
require 'rexml/document'
module Amazon
# Load this module with:
#
# require 'amazon/search'
#
# This module provides basic Amazon search operations.
#
module Search
# Perform a _HEAVY_ search when you want AWS to return all data that it
# has on a given search result.
#
HEAVY = true
# Perform a _LITE_ search when you just want a small subset of the data
# that AWS has for a given search result. See the AWS documentation for
# more details.
#
LITE = false
LIGHT = false
# Use the special constant _ALL_PAGES_ when you are performing a search
# that accepts a page number as a parameter, but you want to retrieve
# _all_ pages, not just a single page.
#
ALL_PAGES = 0
# The following constants govern whether all editions of books are
# returned when performing Request#author_search, Request#keyword_search
# and Request#power_search.
#
ALL_EDITIONS = true
SINGLE_EDITION = false
# _RATE_LIMIT_REQUESTS_ must be +true+ for compliance with Amazon Web
# Services regulations, which stipulate no more than one search per
# second.
#
RATE_LIMIT_REQUESTS = true
# Maximum number of ASINs that can be handled by a _lite_ search.
#
MAX_LITE_ASINS = 30
# Maximum number of ASINs that can be handled by a _heavy_ search.
#
MAX_HEAVY_ASINS = 10
# Maximum number of UPCs that can be handled by a _lite_ search.
#
MAX_LITE_UPCS = 30
# Maximum number of UPCs that can be handled by a _heavy_ search.
#
MAX_HEAVY_UPCS = 10
# Maximum number of 301 and 302 HTTP responses to follow, should Amazon
# later decide to change the location of the service.
#
MAX_REDIRECTS = 3
# :stopdoc:
# The server to contact for the various Amazon locales.
#
LOCALES = { 'ca' => 'xml.amazon.ca',
'de' => 'xml-eu.amazon.com',
'fr' => 'xml.amazon.fr',
'jp' => 'xml.amazon.co.jp',
'uk' => 'xml-eu.amazon.com',
'us' => 'xml.amazon.com',
}
# The mode to use for each category of product in the various Amazon
# locales.
#
MODES = { 'books' => { 'uk' => 'books-uk',
'de' => 'books-de',
'jp' => 'books-jp',
'fr' => 'books-fr',
'ca' => 'books-ca'
},
'music' => { 'uk' => 'music',
'de' => 'pop-music-de',
'jp' => 'music-jp',
'fr' => 'music-fr',
'ca' => 'music-ca'
},
'classical' => { 'uk' => 'classical',
'de' => 'classical-de',
'jp' => 'classical-jp',
'fr' => 'classical-fr',
'ca' => 'classical-ca'
},
'dvd' => { 'uk' => 'dvd-uk',
'de' => 'dvd-de',
'jp' => 'dvd-jp',
'fr' => 'dvd-fr',
'ca' => 'dvd-ca'
},
'vhs' => { 'uk' => 'vhs-uk',
'de' => 'vhs-de',
'jp' => 'vhs-jp',
'fr' => 'vhs-fr',
'ca' => 'vhs-ca'
},
'video' => { 'uk' => 'video',
'de' => 'video',
'jp' => 'video-jp',
'fr' => nil,
'ca' => 'video'
},
'electronics' => { 'uk' => 'electronics',
'de' => 'ce-de',
'jp' => 'electronics-jp',
'fr' => nil,
'ca' => nil
},
'kitchen' => { 'uk' => 'kitchen-uk',
'de' => 'kitchen-de',
'jp' => nil,
'fr' => nil,
'ca' => nil
},
'software' => { 'uk' => 'software-uk',
'de' => 'software-de',
'jp' => 'software-jp',
'fr' => 'software-fr',
'ca' => 'software-ca'
},
'videogames' => { 'uk' => 'video-games-uk',
'de' => 'video-games-de',
'jp' => 'videogames-jp',
'fr' => 'video-games-fr',
'ca' => 'video-games-ca'
},
'magazines' => { 'uk' => nil,
'de' => 'magazines-de',
'jp' => nil,
'fr' => nil,
'ca' => nil
},
'toys' => { 'uk' => 'toys-uk',
'de' => nil,
'jp' => nil,
'fr' => nil,
'ca' => nil
},
'photo' => { 'uk' => nil,
'de' => 'photo-de',
'jp' => nil,
'fr' => nil,
'ca' => nil
},
'baby' => { 'uk' => nil,
'de' => nil,
'jp' => nil,
'fr' => nil,
'ca' => nil
},
'garden' => { 'uk' => nil,
'de' => 'garden-de',
'jp' => nil,
'fr' => nil,
'ca' => nil
},
'pc-hardware' => { 'uk' => nil,
'de' => 'pc-de',
'jp' => nil,
'fr' => nil,
'ca' => nil
},
'tools' => { 'uk' => nil,
'de' => 'tools-de',
'jp' => nil,
'fr' => nil,
'ca' => nil
},
'english-books' => { 'uk' => nil,
'de' => 'books-de-intl-us',
'jp' => 'books-us',
'fr' => 'books-fr-intl-us',
'ca' => 'books-ca-en'
}
# ca has the additional category, 'books-ca-fr', for French
# language books.
}
# The sort types available to each product mode.
#
SORT_TYPES = {
'books' => %w[+pmrank +salesrank +reviewrank +pricerank
+inverse-pricerank +daterank +titlerank
-titlerank],
'software' => %w[+pmrank +salesrank +titlerank +price -price],
'garden' => %w[+psrank +salesrank +titlerank -titlerank
+manufactrank -manufactrank +price -price],
'tools' => %w[+psrank +salesrank +titlerank -titlerank
+manufactrank -manufactrank +price -price],
'photo' => %w[+pmrank +salesrank +titlerank -titlerank],
'pc-hardware' => %w[+psrank +salesrank +titlerank -titlerank],
'videogames' => %w[+pmrank +salesrank +titlerank +price -price],
'music' => %w[+psrank +salesrank +artistrank +orig-rel-date
+titlerank],
'office' => %w[+pmrank +salesrank +titlerank -titlerank
+price -price +reviewrank],
'video' => %w[+psrank +salesrank +titlerank],
'electronics' => %w[+pmrank +salesrank +titlerank +reviewrank],
'dvd' => %w[+salesrank +titlerank],
'kitchen' => %w[+pmrank +salesrank +titlerank -titlerank
+manufactrank -manufactrank +price -price],
'toys' => %w[+pmrank +salesrank +pricerank
+inverse-pricerank +titlerank]
}
# :startdoc:
# Returns an Array of valid product search modes, such as:
#
# *apparel*, *baby*, *books*, *classical*, *dvd*, *electronics*, *garden*,
# *kitchen*, *magazines*, *music*, pc-hardware *photo*, *software*,
# *tools*, *toys*, *universal*, *vhs*, *video*, *videogames*,
# wireless-phones
#
def Search.modes
%w[apparel baby books classical dvd electronics garden kitchen
magazines music pc-hardware photo software tools toys universal vhs
video videogames wireless-phones]
end
# Returns an Array of valid offer types, such as:
#
# *All*, *ThirdPartyNew*, *Used*, *Collectible*, *Refurbished*
#
def Search.offer_types
%w[All ThirdPartyNew Used Collectible Refurbished]
end
# Returns an Array of valid sort types for _mode_, or +nil+ if _mode_
# is invalid.
#
def Search.sort_types(mode)
SORT_TYPES.has_key?(mode) ? SORT_TYPES[mode] : nil
end
# This is the class around which most others in this library revolve. It
# contains the most common search methods and exception classes and is the
# class from which most others in the library inherit.
#
class Request
attr_reader :token, :id, :config, :locale
attr_accessor :cache
AWS_PREFIX = '/onca/xml3' # :nodoc:
# :stopdoc:
WEIGHT = { HEAVY => 'heavy',
LITE => 'lite'
}
# :startdoc:
# Exception class for poorly formed config file.
#
class ConfigError < StandardError; end
# Exception class for failed search requests.
#
class SearchError < StandardError; end
# Exception class for bad developer token.
#
class TokenError < StandardError; end
# Exception class for bad search terms.
#
class TermError < StandardError; end
# Exception class for bad search modes.
#
class ModeError < StandardError; end
# Exception class for bad search types.
#
class TypeError < StandardError; end
# Exception class for bad offer types.
#
class OfferError < StandardError; end
# Exception class for bad locales.
#
class LocaleError < StandardError; end
# Exception class for bad sort types.
#
class SortError < StandardError; end
# Exception class for HTTP errors (anything other than 200)
#
class HTTPError < StandardError; end
# Use this method to instantiate a basic search request object.
# _dev_token_ is your AWS developer
# token[https://associates.amazon.com/exec/panama/associates/join/developer/application.html],
# _associate_ is your
# Associates[https://associates.amazon.com/exec/panama/associates/apply/]
# ID, _locale_ is the search locale in which you wish to work (*us*,
# *uk*, *de*, *fr*, *ca* or *jp*), _cache_ is whether or not to utilise
# a response cache, and _user_agent_ is the name of the client you wish
# to pass when performing calls to AWS. _locale_ and _cache_ can also be
# set later, if you wish to change the current behaviour.
#
# For example:
#
# require 'amazon/search'
# include Amazon::Search
#
# r = Request.new('D23XFCO2UKJY82', 'foobar-20', 'us', false)
#
# # Do a bunch of things in the US locale with the cache off, then:
# #
# r.locale = 'uk' # Switch to the UK locale
# r.id = 'foobaruk-21' # Use a different Associates ID
# # in this locale.
# r.cache = Cache.new('/tmp/amazon') # Start using a cache.
#
# Note that reassigning the locale will dynamically and transparently
# set up a new HTTP connection to the correct server for that locale.
#
# You may also put one or more of these parameters in a configuration
# file, which will be read in the order of /etc/amazonrc and
# ~/.amazonrc. This allows you to have a system configuration
# file, but still override some of its directives in a per user
# configuration file.
#
# For example:
#
# dev_token = 'D23XFCO2UKJY82'
# associate = 'calibanorg-20'
# cache_dir = '/tmp/amazon/cache'
#
# If you do not provide an Associate ID, the one belonging to the author
# of the Ruby/Amazon library will be used. If _locale_ is not provided,
# *us* will be used. If _cache_ == +true+, but you do not specify a
# _cache_dir_ in a configuration file, /tmp/amazon will be used.
# However, this last convenience applies only when a Request object is
# instantiated. In other words, if you started off without a cache, but
# now wish to use one, you will need to directly assign a Cache object,
# as shown above.
#
# If your environment requires the use a HTTP proxy server, you should
# define this in the environment variable $http_proxy.
# Ruby/Amazon will detect this and channel all outbound connections via
# your proxy server.
#
def initialize(dev_token=nil, associate=nil, locale=nil, cache=nil,
user_agent = USER_AGENT)
def_locale = locale
locale = 'us' unless locale
locale.downcase!
configs = [ '/etc/amazonrc' ]
configs << File.expand_path('~/.amazonrc') if ENV.key?('HOME')
@config = {}
configs.each do |config|
if File.exists?(config) && File.readable?(config)
Amazon::dprintf("Opening %s ...\n", config)
File.open(config) { |f| lines = f.readlines }.each do |line|
line.chomp!
# Skip comments and blank lines
next if line =~ /^(#|$)/
Amazon::dprintf("Read: %s\n", line)
# Store these, because we'll probably find a use for these later
begin
match = line.match(/^(\S+)\s*=\s*(['"]?)([^'"]+)(['"]?)/)
key, begin_quote, val, end_quote = match[1,4]
raise ConfigError if begin_quote != end_quote
rescue NoMethodError, ConfigError
raise ConfigError, "bad config line: #{line}"
end
@config[key] = val
# Right now, we just evaluate the line, setting the variable if
# it does not already exist
eval line.sub(/=/, '||=')
end
end
end
# take locale from config file if no locale was passed to method
locale = @config['locale'] if @config.key?('locale') && ! def_locale
validate_locale(locale)
if dev_token.nil?
raise TokenError, 'dev_token may not be nil'
end
@token = dev_token
@id = associate || DEFAULT_ID[locale]
@user_agent = user_agent
@cache = unless cache == false
Amazon::Search::Cache.new(@config['cache_dir'] ||
'/tmp/amazon')
else
nil
end
self.locale = locale
end
def locale=(l)
old_locale = @locale ||= nil
@locale = validate_locale(l)
# Use the new locale's default ID if the ID currently in use is the
# current locale's default ID.
@id = DEFAULT_ID[@locale] if @id == DEFAULT_ID[old_locale]
# We must now set up a new HTTP connection to the correct server for
# this locale, unless the same server is used for both.
connect(@locale) unless LOCALES[@locale] == LOCALES[old_locale]
end
# Verify the validity of a locale string.
#
def validate_locale(l)
raise LocaleError, "invalid locale: #{l}" unless LOCALES.has_key? l
l
end
private :validate_locale
# Return an HTTP connection for the current locale.
#
def connect(locale)
if ENV.key? 'http_proxy'
uri = URI.parse(ENV['http_proxy'])
proxy_user = proxy_pass = nil
proxy_user, proxy_pass = uri.userinfo.split(/:/) if uri.userinfo
@conn = Net::HTTP::Proxy(uri.host, uri.port,
proxy_user,
proxy_pass).start(LOCALES[locale])
else
@conn = Net::HTTP::start(LOCALES[locale])
end
end
private :connect
# Encode a string, such that it is suitable for HTTP transmission.
#
def url_encode(string)
# shamelessly plagiarised from Wakou Aoyama's cgi.rb
string.gsub(/([^ a-zA-Z0-9_.-]+)/n) do
'%' + $1.unpack('H2' * $1.size).join('%').upcase
end.tr(' ', '+')
end
private :url_encode
# Convert a US mode string into a localised mode string.
#
# English-language books have their own special mode on non-English
# speaking sites, so we use the pseudo-mode 'english-books' for the US
#
def localise_mode(m)
if @locale == 'us' && m == 'english-books'
raise ModeError, "Invalid mode '#{m}' in locale '#{@locale}'"
end
return m if @locale == 'us'
if MODES[m][@locale].nil?
raise ModeError, "Invalid mode '#{m}' in locale '#{@locale}'"
end
MODES[m][@locale]
end
private :localise_mode
# Deal with a request for offers.
#
def get_offer_string(offer=nil)
unless offer.nil?
if @locale == 'de'
raise LocaleError, "search type invalid in '#{@locale}' locale"
end
unless Search.offer_types.include? offer
raise OfferError, "'offerings' must be one of %s" %
Search.offer_types.join(', ')
end
@type = WEIGHT[HEAVY]
return "&offer=" << offer
end
return ""
end
private :get_offer_string
# Deal with a request for a particular sort type.
#
def get_sort_string(sort_type, mode)
unless sort_type.nil?
unless Search.sort_types(mode).include? sort_type
raise SortError,
"invalid sort type '#{sort_type}' for mode #{mode}"
end
return "&sort=" << url_encode(sort_type)
end
return ""
end
private :get_sort_string
# Search for a product by actor and return an Amazon::Search::Response.
# If a block is given, that Response's @products will be passed to the
# block.
#
# E.g.
#
# resp = req.actor_search('ricky gervais', 'dvd', LITE, 1,
# '+titlerank', 'ThirdPartyNew')
#
def actor_search(actor, mode='dvd', weight=HEAVY, page=1,
sort_type=nil, offerings=nil, keyword=nil,
price=nil, &block)
url = AWS_PREFIX + "?t=%s&ActorSearch=%s&mode=%s&f=xml" +
"&type=%s&dev-t=%s&page=%s"
url << "&price=" << price unless price.nil?
url << get_offer_string(offerings)
@type = WEIGHT[weight]
sort_string = get_sort_string(sort_type, mode)
actor = url_encode(actor)
modes = %w[dvd vhs video]
unless modes.include? mode
raise ModeError, "mode must be one of %s" % modes.join(', ')
end
mode = localise_mode(mode)
url = url % [@id, actor, mode, @type, @token, page] << sort_string
url << "&keywords=" << url_encode(keyword) unless keyword.nil?
search(url, &block)
end
# Search for a product by artist and return an Amazon::Search::Response.
# If a block is given, that Response's @products will be passed to the
# block.
#
def artist_search(artist, mode='music', weight=HEAVY, page=1,
sort_type=nil, offerings=nil, keyword=nil,
price=nil, &block)
url = AWS_PREFIX + "?t=%s&ArtistSearch=%s&mode=%s&f=xml" +
"&type=%s&dev-t=%s&page=%s"
url << "&price=" << price unless price.nil?
url << get_offer_string(offerings)
@type = WEIGHT[weight]
sort_string = get_sort_string(sort_type, mode)
artist = url_encode(artist)
modes = %w[music classical]
unless modes.include? mode
raise ModeError, "mode must be one of %s" % modes.join(', ')
end
mode = localise_mode(mode)
url = url % [@id, artist, mode, @type, @token, page] << sort_string
url << "&keywords=" << url_encode(keyword) unless keyword.nil?
search(url, &block)
end
# Search for a product by ASIN(s) and returns an
# Amazon::Search::Response. If a block is given, that Response's
# @products will be passed to the block. The _offer_page_ parameter is
# ignored unless _offerings_ is not +nil+.
#
def asin_search(asin, weight=HEAVY, offer_page=nil, offerings=nil,
&block)
url = AWS_PREFIX + "?t=%s&AsinSearch=%s&f=xml&type=%s&dev-t=%s"
@type = WEIGHT[weight]
unless offerings.nil?
url << get_offer_string(offerings)
url << "&offerpage=%s" % (offer_page || 1)
end
asin.gsub!(/ /, ',') if asin.is_a? String
asin = asin.join(',') if asin.is_a? Array
if asin.count(',') >= (weight ? MAX_HEAVY_ASINS : MAX_LITE_ASINS)
raise TermError, "too many ASINs"
end
search(url % [@id, asin, @type, @token], &block)
end
# Search for a book by author and return an Amazon::Search::Response. If
# a block is given, that Response's @products will be passed to the
# block.
#
def author_search(author, mode='books', weight=HEAVY, page=1,
sort_type=nil, offerings=nil, keyword=nil,
price=nil, editions=SINGLE_EDITION, &block)
url = AWS_PREFIX + "?t=%s&AuthorSearch=%s&mode=%s&f=xml" +
"&type=%s&dev-t=%s&page=%s"
url << "&price=" << price unless price.nil?
url << "&variations=yes" if editions == ALL_EDITIONS
url << get_offer_string(offerings)
sort_string = get_sort_string(sort_type, mode)
@type = WEIGHT[weight]
author = url_encode(author)
raise ModeError, 'mode must be books' unless mode == 'books'
mode = localise_mode(mode)
url = url % [@id, author, mode, @type, @token, page] << sort_string
search(url, &block)
end
# Search for a product by director and return an
# Amazon::Search::Response. If a block is given, that Response's
# @products will be passed to the block.
#
def director_search(director, mode='dvd', weight=HEAVY, page=1,
sort_type=nil, offerings=nil, keyword=nil,
price=nil, &block)
url = AWS_PREFIX + "?t=%s&DirectorSearch=%s&mode=%s&f=xml" +
"&type=%s&dev-t=%s&page=%s"
url << "&keywords=" << url_encode(keyword) unless keyword.nil?
url << "&price=" << price unless price.nil?
url << get_offer_string(offerings)
@type = WEIGHT[weight]
sort_string = get_sort_string(sort_type, mode)
director = url_encode(director)
modes = %w[dvd vhs video]
unless modes.include? mode
raise ModeError, "mode must be one of %s" % modes.join(', ')
end
mode = localise_mode(mode)
url = url % [@id, director, mode, @type, @token, page] << sort_string
url << "&keywords=" << url_encode(keyword) unless keyword.nil?
search(url, &block)
end
# Search for a product by keyword(s) and return an
# Amazon::Search::Response. If a block is given, that Response's
# @products will be passed to the block.
#
def keyword_search(keyword, mode='books', weight=HEAVY, page=1,
sort_type=nil, offerings=nil, price=nil,
editions=SINGLE_EDITION, &block)
url = AWS_PREFIX + "?t=%s&KeywordSearch=%s&mode=%s&f=xml" +
"&type=%s&dev-t=%s&page=%s"
url << "&price=" << price unless price.nil?
url << "&variations=yes" if editions == ALL_EDITIONS
url << get_offer_string(offerings)
@type = WEIGHT[weight]
sort_string = get_sort_string(sort_type, mode)
keyword = url_encode(keyword)
unless Search.modes.include? mode
raise ModeError, "mode must be one of %s" % Search.modes.join(', ')
end
mode = localise_mode(mode)
url = url % [@id, keyword, mode, @type, @token, page] << sort_string
search(url, &block)
end
# Return an Amazon::Search::Response of the products on a Listmania list.
# If a block is given, that Response's @products will be passed to the
# block.
#
def listmania_search(list_id, weight=HEAVY, &block)
url = AWS_PREFIX + "?t=%s&ListManiaSearch=%s&f=xml&type=%s&dev-t=%s"
@type = WEIGHT[weight]
unless list_id.length.between?(12, 13)
raise TermError, "list ID length must be 12 or 13 characters"
end
search(url % [@id, list_id, @type, @token], &block)
end
# Search for a product by manufacturer and return an
# Amazon::Search::Response. If a block is given, that Response's
# @products will be passed to the block.
#
def manufacturer_search(director, mode='electronics', weight=HEAVY,
page=1, sort_type=nil, offerings=nil,
keyword=nil, price=nil, &block)
url = AWS_PREFIX + "?t=%s&ManufacturerSearch=%s&mode=%s" +
"&f=xml&type=%s&dev-t=%s&page=%s"
url << "&price=" << price unless price.nil?
url << get_offer_string(offerings)
@type = WEIGHT[weight]
sort_string = get_sort_string(sort_type, mode)
director = url_encode(director)
modes = %w[electronics kitchen videogames software
photo pc-hardware]
unless modes.include? mode
raise ModeError, "mode must be one of %s" % modes.join(', ')
end
mode = localise_mode(mode)
url = url % [@id, director, mode, @type, @token, page] << sort_string
url << "&keywords=" << url_encode(keyword) unless keyword.nil?
search(url, &block)
end
# Search for a product by browse node. The default of '1000' is for
# best-selling books. Returns an Amazon::Search::Response. If a block is
# given, that Response's @products will be passed to the block.
#
def node_search(browse_node='1000', mode='books', weight=HEAVY, page=1,
sort_type=nil, offerings=nil, keyword=nil, price=nil,
&block)
url = AWS_PREFIX + "?t=%s&BrowseNodeSearch=%s&mode=%s&f=xml" +
"&type=%s&dev-t=%s&page=%s"
url << "&price=" << price unless price.nil?
url << get_offer_string(offerings)
@type = WEIGHT[weight]
sort_string = get_sort_string(sort_type, mode)
if browse_node.is_a? Array
raise TypeError, "string or integer required"
elsif browse_node =~ / /
raise TermError, "single item expected"
end
unless Search.modes.include? mode
raise ModeError, "mode must be one of %s" % Search.modes.join(', ')
end
mode = localise_mode(mode)
url =
url % [@id, browse_node, mode, @type, @token, page] << sort_string
url << "&keywords=" << url_encode(keyword) unless keyword.nil?
search(url, &block)
end
# Search for a book, using a power search, and return an
# Amazon::Search::Response. If a block is given, that Response's
# @products will be passed to the block.
#
def power_search(query, mode='books', weight=HEAVY, page=1,
sort_type=nil, offerings=nil, editions=SINGLE_EDITION,
&block)
url = AWS_PREFIX + "?t=%s&PowerSearch=%s&mode=%s&f=xml" +
"&type=%s&dev-t=%s&page=%s"
url << "&variations=yes" if editions == ALL_EDITIONS
url << get_offer_string(offerings)
sort_string = get_sort_string(sort_type, mode)
@type = WEIGHT[weight]
query = url_encode(query)
raise ModeError, 'mode must be books' unless mode == 'books'
mode = localise_mode(mode)
url = url % [@id, query, mode, @type, @token, page] << sort_string
search(url, &block)
end
# Search for a product's similar products and return an
# Amazon::Search::Response. If a block is given, that Response's
# @products will be passed to the block.
#
def similarity_search(asin, weight=HEAVY, page=1, &block)
url = AWS_PREFIX + "?t=%s&SimilaritySearch=%s&f=xml" +
"&type=%s&dev-t=%s&page=%s"
@type = WEIGHT[weight]
asin.gsub!(/ /, ',') if asin.is_a? String
asin = asin.join(',') if asin.is_a? Array
if asin.count(',') >= 5
raise TermError, "too many ASINs (max. 5 for this search)"
end
search(url % [@id, asin, @type, @token, page], &block)
end
# Perform a text stream search and return an Amazon::Search::Response.
# If a block is given, that Response's @products will be passed to the
# block.
#
def text_stream_search(text_stream, mode='books', weight=HEAVY,
sort_type=nil, page=1, &block)
# this search type not available for international sites
unless @locale == 'us'
raise LocaleError, "search type invalid in '#{@locale}' locale"
end
url = AWS_PREFIX + "?t=%s&TextStreamSearch=%s&mode=%s&f=xml" +
"&type=%s&dev-t=%s&page=%s"
@type = WEIGHT[weight]
sort_string = get_sort_string(sort_type, mode)
modes = %w[electronics books videogames apparel toys photo
music dvd wireless-phones]
unless modes.include? mode
raise ModeError, "mode must be one of %s" % modes.join(', ')
end
# strip a few useless words from the text stream
%w[and or not the a an but to for of on at].each do |particle|
text_stream.gsub!(/\b#{particle}\b/, '')
end
text_stream = url_encode(text_stream)
url =
url % [@id, text_stream, mode, @type, @token, page] << sort_string
search(url, &block)
end
# Search for a product by UPC code(s) and return an
# Amazon::Search::Response. If a block is given, that Response's
# @products will be passed to the block.
#
def upc_search(upc, mode='music', weight=HEAVY, &block)
unless @locale == 'us'
raise LocaleError, "search type invalid in '#{@locale}' locale"
end
url = AWS_PREFIX + "?t=%s&UpcSearch=%s&mode=%s&f=xml&type=%s&dev-t=%s"
@type = WEIGHT[weight]
upc.gsub!(/ /, ',') if upc.is_a? String
upc = upc.join(',') if upc.is_a? Array
if upc.count(',') >= (weight ? MAX_HEAVY_UPCS : MAX_LITE_UPCS)
raise TermError, "too many UPCs"
end
modes = %w[music classical software dvd vhs video
electronics pc-hardware photo]
unless modes.include? mode
raise ModeError, "mode must be one of %s" % modes.join(', ')
end
mode = localise_mode(mode)
search(url % [@id, upc, mode, @type, @token], &block)
end
# Return an Amazon::Search::Response of the products on a wishlist.
# If a block is given, that Response's @products will be passed to the
# block.
#
def wishlist_search(list_id, weight=HEAVY, page=1, &block)
url = AWS_PREFIX + "?t=%s&WishlistSearch=%s&f=xml" +
"&type=%s&dev-t=%s&page=%s"
@type = WEIGHT[weight]
unless list_id.length.between?(12, 13)
raise TermError, "list ID length must be 12 or 13 characters"
end
search(url % [@id, list_id, @type, @token, page], &block)
end
# Handle non-existent and unimplemented search types.
#
def method_missing(*params)
raise TypeError,
"non-existent/unimplemented search type: #{params[0].id2name}"
end
private :method_missing
# Get page, but throw exception if there's an HTTP error
#
def get_page(url) # :nodoc:
# check for cached page and return that if it's there
if @cache && @cache.cached?(url)
body = @cache.get_cached(url)
return body if body
end
Amazon::dprintf("Fetching http://%s%s ...\n", @conn.address, url)
response = @conn.get(url, { 'user-agent', @user_agent })
redirects = 0
while response.key?('location')
if (redirects += 1) > MAX_REDIRECTS
raise HTTPError, "More than #{MAX_REDIRECTS} redirections"
end
old_url = url
url = URI.parse(response['location'])
url.scheme = old_url.scheme unless url.scheme
url.host = old_url.host unless url.host
Amazon::dprintf("Following HTTP %s to %s ...\n", response.code, url)
response = Net::HTTP::start(url.host).
get(url.path,{ 'user-agent', @user_agent })
end
if response.code != '200'
raise HTTPError, "HTTP response code #{response.code}"
end
# cache the page if we're using a cache and it's not the result of
# a shopping cart transaction
if @cache && ! is_a?(Amazon::ShoppingCart)
@cache.cache(url, response.body)
end
response.body
end
# Perform the actual search.
#
def search(url, &block) # :nodoc:
url << "&locale=" << @locale
# determine whether we need to retrieve all pages
all_pages = url.sub!(/page=#{ALL_PAGES}/, 'page=1')
# get the page
body = get_page(url)
body = case self
when Amazon::Search::Exchange::Marketplace::Request
Exchange::Marketplace::Response.new(body)
when Amazon::Search::Exchange::ThirdParty::Request
Exchange::ThirdParty::Response.new(body)
when Amazon::Search::Exchange::Request
Exchange::Response.new(body)
when Amazon::Search::Blended::Request
Blended::Response.new(body)
when Amazon::Search::Seller::Request
Seller::Response.new(body)
when Amazon::ShoppingCart
Amazon::ShoppingCart::Response.new(body)
else # must be Amazon::Search::Request
Response.new(body)
end
# FIXME: This is a gross little hack to return wishlists in
# descending order.
#
if caller[0] =~ /`wishlist_search'$/
body.products = body.products.reverse
end
if all_pages
responses = [body]
threads = []
begin
total_pages = body.products.total_pages
rescue
raise SearchError, 'failed to determine total number of pages'
end
# Get second and subsequent pages in parallel
# FIXME: why is this not faster?
#
2.upto(total_pages) do |page_nr|
# be nice to Amazon
sleep 1 if RATE_LIMIT_REQUESTS &&
! ENV.key?('AMAZON_NO_RATE_LIMIT')
threads << Thread.new(url) do |paged_url|
req = Request.new(@token, @id, @locale, @cache, @user_agent)
# increment page number
paged_url.sub!(/page=\d+/) { |s| "page=#{page_nr}" }
# go on and get next body, appending to our list
response = Response.new(req.get_page(paged_url))
responses << response
end
end
threads.each { |t| t.join }
if responses.size != body.products.total_pages
raise SearchError, "Failed to get all pages"
end
# Define a singleton method for this Array, so that we can retrieve
# all products of each Response in a single method call. I.e.
# there's no need to do something like this:
#
# responses.each { |r| r.products.each { |p| puts p } }
#
# Instead, we can do this:
#
# responses.products.each { |p| puts p }
#
def responses.products
products = []
each { |page| products << page.products }
products.flatten!
end
# return an Array of Responses, sorted on page number
responses.sort! { |a,b| a.args['page'].to_i <=> b.args['page'].to_i }
responses.products.each(&block) if block_given?
responses
else
# return a single Response
body.products.each(&block) if block_given?
body
end
end
private :search
end
class Response < String
attr_reader :stream # :nodoc:
attr_reader :args
#--
# FIXME: This can become a reader method if wishlists ever revert to
# being returned in descending order.
#++
attr_accessor :products
def initialize(stream)
@args = {}
@error = nil
@stream = nil
if stream.is_a? File
# we were passed an open file handle -- slurp it as a string
@stream = stream
super stream.readlines(nil)[0]
@stream.close
elsif stream.is_a? REXML::Element
@stream = stream
else # String
super stream
end
parse
end
# Parse the request/response arguments.
#
def get_args(node, detail_node=node) # :nodoc:
node.elements.each('Request/Args/Arg') do |arg|
key = arg.attributes['name']
val = arg.attributes['value']
@args[key] = val
end
Amazon::dprintf("Response args = %s\n", @args.inspect)
# Check for the presence of actual results.
#
unless detail_node.has_elements?
@error = "empty result set"
raise Amazon::Search::Request::SearchError, @error
end
begin
if node.elements['ErrorMsg'].nil?
@error = detail_node.elements['ErrorMessage'].text
else
@error = detail_node.elements['ErrorMsg'].text
end
raise Amazon::Search::Request::SearchError, @error
rescue NoMethodError
# AWS has not reported an error on their side.
end
if @args.empty? && ! @stream.is_a?(File)
raise Amazon::Search::Request::SearchError,
"response contained no arguments"
end
end
# Convert a string from CamelCase to ruby_case
#
def uncamelise(str)
str.gsub(/(.[a-z])(?=[A-Z])/, "\\1_\\2").downcase
end
private :uncamelise
# Parse an XML Amazon::Search::Request and return an
# Amazon::Search::Response.
#
def parse
@products = []
# create a singleton #inspect for looking at this Array, including
# its instance variables for TotalResults and TotalPages.
#
class << @products # :nodoc:
attr_accessor :total_results, :total_pages
alias_method :old_inspect, :inspect
def inspect
str = ""
unless @total_pages.nil?
str << "@total_pages=#{@total_pages}, " +
str << "@total_results=#{@total_results},\n"
end
str << old_inspect
end
end
if @stream.nil? || @stream.is_a?(File)
doc = REXML::Document.new(self).elements['ProductInfo']
# populate @args with header data
get_args(doc) if @args.empty?
else
doc = @stream
end
begin
@products.total_results = doc.elements['TotalResults'].text.to_i
@products.total_results.freeze
@products.total_pages = doc.elements['TotalPages'].text.to_i
@products.total_pages.freeze
rescue
@products.total_results = nil
@products.total_pages = nil
end
doc.elements.each('Details') do |detail|
product = Product.new(detail.attributes['url'])
detail.elements.each do |property|
if property.has_elements?
case property.name
# deal with elements that have more than one sub-level
when 'BrowseList'
browsenames = property.elements.map do |e|
e.elements.map { |e| e.text }
end.flatten
product.instance_variable_set(:@browse_list, browsenames)
when 'Reviews'
# can be either AverageCustomerRating or AverageRating
avg = property.elements[1].text.to_f
tcr = property.elements[2].text.to_i
list =
property.elements.map { |e| e.elements.map { |e| e.text } }
reviews = []
list.each do |r|
reviews << Product::Review.new(*r) unless r.empty?
end
product.instance_variable_set(:@average_customer_rating, avg)
product.instance_variable_set(:@total_customer_reviews, tcr)
product.instance_variable_set(:@reviews, reviews)
when 'ThirdPartyProductInfo' # Offerings returns these
info = []
property.elements.map do |e|
tpi = Product::ThirdPartyInfo.new
e.elements.map do |e|
iv, value = e.name, e.text
value = value.to_i if value.to_i > 0
unless value.nil?
# normalise instance variable's name
iv = uncamelise(iv)
tpi.instance_variable_set("@#{iv}".to_sym, value)
end
end
info << tpi
end
product.instance_variable_set(:@third_party_product_info, info)
else # deal with the rest
members = property.elements.map { |e| e.text }
iv = uncamelise(property.name)
product.instance_variable_set("@#{iv}".to_sym, members)
end
else # these elements have no children
value = property.text
value = value.gsub(/,/, '').to_i if property.name =~ /Num|Rank/
iv = uncamelise(property.name)
product.instance_variable_set("@#{iv}".to_sym, value)
end
end
@products << product
end
self
end
private :parse
end
end
end
require 'amazon/search/blended'
require 'amazon/search/exchange'
require 'amazon/search/exchange/marketplace'
require 'amazon/search/exchange/thirdparty'
require 'amazon/search/seller'
require 'amazon/shoppingcart'