diff --git a/LICENSE b/LICENSE index f3e2506..cc631e0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,7 +1,8 @@ BSD-style license ================= -Copyright (c) 2010, Michael Stephens +Copyright (c) 2010 Michael Stephens +Copyright (c) 2012-2013 Michael Smith All rights reserved. diff --git a/README.rst b/README.rst index 690c1ac..486aaaf 100644 --- a/README.rst +++ b/README.rst @@ -2,11 +2,18 @@ python-duckduckgo ================== -A Python library for querying the Duck Duck Go API. +A Python library for querying the DuckDuckGo API. -Copyright Michael Stephens , released under a BSD-style license. +Copyright (c) 2010 Michael Stephens +Copyright (c) 2012-2013 Michael Smith -Source: http://github.com/mikejs/python-duckduckgo +Released under a 3-clause BSD license, see LICENSE for details. + +Latest Source: http://github.com/crazedpsyc/python-duckduckgo +Original source: http://github.com/mikejs/python-duckduckgo (outdated) + +This version has been forked from the original to handle some new features of +the API, and switch from XML to JSON. Installation ============ @@ -19,30 +26,50 @@ Usage ===== >>> import duckduckgo - >>> r = duckduckgo.query('Duck Duck Go') + >>> r = duckduckgo.query('DuckDuckGo') >>> r.type - 'answer' + u'answer' >>> r.results[0].text - 'Official site' + u'Official site' >>> r.results[0].url - 'http://duckduckgo.com/' + u'http://duckduckgo.com/' >>> r.abstract.url - 'http://en.wikipedia.org/wiki/Duck_Duck_Go' + u'http://en.wikipedia.org/wiki/Duck_Duck_Go' >>> r.abstract.source - 'Wikipedia' + u'Wikipedia' >>> r = duckduckgo.query('Python') >>> r.type - 'disambiguation' - >>> r.related[6].text - 'Python (programming language), a computer programming language' - >>> r.related[6].url - 'http://duckduckgo.com/Python_(programming_language)' + u'disambiguation' + >>> r.related[1].text + u'Python (programming language), a computer programming language' + >>> r.related[1].url + u'http://duckduckgo.com/Python_(programming_language)' + >>> r.related[7].topics[0].text # weird, but this is how the DDG API is currently organized + u'Armstrong Siddeley Python, an early turboprop engine' + >>> r = duckduckgo.query('1 + 1') >>> r.type - 'nothing' + u'nothing' >>> r.answer.text - '1 + 1 = 2' + u'1 + 1 = 2' >>> r.answer.type - 'calc' + u'calc' + + >>> print duckduckgo.query('19301', kad='es_ES').answer.text + 19301 es un código postal de Paoli, PA + >>> print duckduckgo.query('how to spell test', html=True).answer.text + Test appears to be spelled right!
Suggestions: test, testy, teat, tests, rest, yest. + +The easiest method of quickly grabbing the best (hopefully) API result is to use duckduckgo.get_zci:: + >>> print duckduckgo.get_zci('foo') + The terms foobar /ˈfʊːbɑːr/, fubar, or foo, bar, baz and qux are sometimes used as placeholder names in computer programming or computer-related documentation. (https://en.wikipedia.org/wiki/Foobar) + >>> print ddg.get_zci('foo fighters site') + http://www.foofighters.com/us/home + +Special keyword args for query(): + - useragent - string, The useragent used to make API calls. This is somewhat irrelevant, as they are not logged or used on DuckDuckGo, but it is retained for backwards compatibility. + - safesearch - boolean, enable or disable safesearch. + - html - boolean, Allow HTML in responses? + diff --git a/duckduckgo.py b/duckduckgo.py index 7ff2209..4f81e06 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -1,14 +1,21 @@ -#!/usr/bin/env python +# duckduckgo.py - Library for querying the DuckDuckGo API +# +# Copyright (c) 2010 Michael Stephens +# Copyright (c) 2012-2013 Michael Smith +# +# See LICENSE for terms of usage, modification and redistribution. + import urllib import urllib2 -from xml.etree import ElementTree +import json as j +import sys -__version__ = 0.1 +__version__ = 0.242 -def query(query, useragent='python-duckduckgo 0.1'): +def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=True, html=False, meanings=True, **kwargs): """ - Query Duck Duck Go, returning a Results object. + Query DuckDuckGo, returning a Results object. Here's a query that's unlikely to change: @@ -19,146 +26,161 @@ def query(query, useragent='python-duckduckgo 0.1'): '1 + 1 = 2' >>> result.answer.type 'calc' - """ - params = urllib.urlencode({'q': query, 'o': 'x'}) - url = 'http://duckduckgo.com/?' + params + + Keword arguments: + useragent: UserAgent to use while querying. Default: "python-duckduckgo %d" (str) + safesearch: True for on, False for off. Default: True (bool) + html: True to allow HTML in output. Default: False (bool) + meanings: True to include disambiguations in results (bool) + Any other keyword arguments are passed directly to DuckDuckGo as URL params. + """ % __version__ + + safesearch = '1' if safesearch else '-1' + html = '0' if html else '1' + meanings = '0' if meanings else '1' + params = { + 'q': query, + 'o': 'json', + 'kp': safesearch, + 'no_redirect': '1', + 'no_html': html, + 'd': meanings, + } + params.update(kwargs) + encparams = urllib.urlencode(params) + url = 'http://api.duckduckgo.com/?' + encparams request = urllib2.Request(url, headers={'User-Agent': useragent}) response = urllib2.urlopen(request) - xml = ElementTree.fromstring(response.read()) + json = j.loads(response.read()) response.close() - return Results(xml) + return Results(json) class Results(object): - def __init__(self, xml): + def __init__(self, json): self.type = {'A': 'answer', 'D': 'disambiguation', 'C': 'category', 'N': 'name', - 'E': 'exclusive', '': 'nothing'}[xml.findtext('Type', '')] + 'E': 'exclusive', '': 'nothing'}.get(json.get('Type',''), '') - self.api_version = xml.attrib.get('version', None) + self.json = json + self.api_version = None # compat - self.heading = xml.findtext('Heading', '') + self.heading = json.get('Heading', '') - self.results = [Result(elem) for elem in xml.getiterator('Result')] + self.results = [Result(elem) for elem in json.get('Results',[])] self.related = [Result(elem) for elem in - xml.getiterator('RelatedTopic')] + json.get('RelatedTopics',[])] - self.abstract = Abstract(xml) - - answer_xml = xml.find('Answer') - if answer_xml is not None: - self.answer = Answer(answer_xml) - if not self.answer.text: - self.answer = None - else: - self.answer = None + self.abstract = Abstract(json) + self.redirect = Redirect(json) + self.definition = Definition(json) + self.answer = Answer(json) - image_xml = xml.find('Image') - if image_xml is not None and image_xml.text: - self.image = Image(image_xml) - else: - self.image = None + self.image = Image({'Result':json.get('Image','')}) class Abstract(object): - def __init__(self, xml): - self.html = xml.findtext('Abstract', '') - self.text = xml.findtext('AbstractText', '') - self.url = xml.findtext('AbstractURL', '') - self.source = xml.findtext('AbstractSource') + def __init__(self, json): + self.html = json.get('Abstract', '') + self.text = json.get('AbstractText', '') + self.url = json.get('AbstractURL', '') + self.source = json.get('AbstractSource') +class Redirect(object): -class Result(object): + def __init__(self, json): + self.url = json.get('Redirect', '') - def __init__(self, xml): - self.html = xml.text - self.text = xml.findtext('Text') - self.url = xml.findtext('FirstURL') +class Result(object): - icon_xml = xml.find('Icon') - if icon_xml is not None: - self.icon = Image(icon_xml) + def __init__(self, json): + self.topics = json.get('Topics', []) + if self.topics: + self.topics = [Result(t) for t in self.topics] + return + self.html = json.get('Result') + self.text = json.get('Text') + self.url = json.get('FirstURL') + + icon_json = json.get('Icon') + if icon_json is not None: + self.icon = Image(icon_json) else: self.icon = None class Image(object): - def __init__(self, xml): - self.url = xml.text - self.height = xml.attrib.get('height', None) - self.width = xml.attrib.get('width', None) + def __init__(self, json): + self.url = json.get('Result') + self.height = json.get('Height', None) + self.width = json.get('Width', None) class Answer(object): - def __init__(self, xml): - self.text = xml.text - self.type = xml.attrib.get('type', '') + def __init__(self, json): + self.text = json.get('Answer') + self.type = json.get('AnswerType', '') +class Definition(object): + def __init__(self, json): + self.text = json.get('Definition','') + self.url = json.get('DefinitionURL') + self.source = json.get('DefinitionSource') -def main(): - import sys - from optparse import OptionParser - - parser = OptionParser(usage="usage: %prog [options] query", - version="ddg %s" % __version__) - parser.add_option("-o", "--open", dest="open", action="store_true", - help="open results in a browser") - parser.add_option("-n", dest="n", type="int", default=3, - help="number of results to show") - parser.add_option("-d", dest="d", type="int", default=None, - help="disambiguation choice") - (options, args) = parser.parse_args() - q = ' '.join(args) - - if options.open: - import urllib - import webbrowser - - webbrowser.open("http://duckduckgo.com/?%s" % urllib.urlencode( - dict(q=q)), new=2) - - sys.exit(0) - - results = query(q) - - if options.d and results.type == 'disambiguation': - try: - related = results.related[options.d - 1] - except IndexError: - print "Invalid disambiguation number." - sys.exit(1) - results = query(related.url.split("/")[-1].replace("_", " ")) - - if results.answer and results.answer.text: - print "Answer: %s\n" % results.answer.text - elif results.abstract and results.abstract.text: - print "%s\n" % results.abstract.text - - if results.type == 'disambiguation': - print ("'%s' can mean multiple things. You can re-run your query " - "and add '-d #' where '#' is the topic number you're " - "interested in.\n" % q) - - for i, related in enumerate(results.related[0:options.n]): - name = related.url.split("/")[-1].replace("_", " ") - summary = related.text - if len(summary) < len(related.text): - summary += "..." - print '%d. %s: %s\n' % (i + 1, name, summary) - else: - for i, result in enumerate(results.results[0:options.n]): - summary = result.text[0:70].replace(" ", " ") - if len(summary) < len(result.text): - summary += "..." - print "%d. %s" % (i + 1, summary) - print " <%s>\n" % result.url +def get_zci(q, web_fallback=True, priority=['answer', 'abstract', 'related.0', 'definition'], urls=True, **kwargs): + '''A helper method to get a single (and hopefully the best) ZCI result. + priority=list can be used to set the order in which fields will be checked for answers. + Use web_fallback=True to fall back to grabbing the first web result. + passed to query. This method will fall back to 'Sorry, no results.' + if it cannot find anything.''' -if __name__ == '__main__': - main() + ddg = query('\\'+q, **kwargs) + response = '' + + for p in priority: + ps = p.split('.') + type = ps[0] + index = int(ps[1]) if len(ps) > 1 else None + + result = getattr(ddg, type) + if index is not None: + if not hasattr(result, '__getitem__'): raise TypeError('%s field is not indexable' % type) + result = result[index] if len(result) > index else None + if not result: continue + + if result.text: response = result.text + if result.text and hasattr(result,'url') and urls: + if result.url: response += ' (%s)' % result.url + if response: break + + # if there still isn't anything, try to get the first web result + if not response and web_fallback: + if ddg.redirect.url: + response = ddg.redirect.url + + # final fallback + if not response: + response = 'Sorry, no results.' + + return response + +def main(): + if len(sys.argv) > 1: + q = query(' '.join(sys.argv[1:])) + keys = q.json.keys() + keys.sort() + for key in keys: + sys.stdout.write(key) + if type(q.json[key]) in [str,unicode,int]: print(':', q.json[key]) + else: + sys.stdout.write('\n') + for i in q.json[key]: print('\t',i) + else: + print('Usage: %s [query]' % sys.argv[0]) diff --git a/setup.py b/setup.py index e82cc94..31d578a 100644 --- a/setup.py +++ b/setup.py @@ -3,14 +3,14 @@ long_description = open('README.rst').read() -setup(name='duckduckgo', +setup(name='duckduckgo2', version=__version__, py_modules=['duckduckgo'], - description='Library for querying the Duck Duck Go API', - author='Michael Stephens', - author_email='me@mikej.st', + description='Library for querying the DuckDuckGo API', + author='Michael Smith', + author_email='crazedpsyc@duckduckgo.com', license='BSD', - url='http://github.com/mikejs/python-duckduckgo/', + url='http://github.com/crazedpsyc/python-duckduckgo/', long_description=long_description, platforms=['any'], classifiers=["Development Status :: 4 - Beta",