Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API Updates #1

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
BSD-style license
=================

Copyright (c) 2010, Michael Stephens
Copyright (c) 2010 Michael Stephens <[email protected]>
Copyright (c) 2012-2013 Michael Smith <[email protected]>

All rights reserved.

Expand Down
61 changes: 44 additions & 17 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,18 @@
python-duckduckgo
==================

A Python library for querying the Duck Duck Go API.
A Python library for querying the DuckDuckGo API.

Copyright Michael Stephens <[email protected]>, released under a BSD-style license.
Copyright (c) 2010 Michael Stephens <[email protected]>
Copyright (c) 2012-2013 Michael Smith <[email protected]>

Source: http://github.com/mikejs/python-duckduckgo
Released under a 3-clause BSD license, see LICENSE for details.

Latest Source: http://github.com/crazedpsyc/python-duckduckgo
Original source: http://github.com/mikejs/python-duckduckgo (outdated)

This version has been forked from the original to handle some new features of
the API, and switch from XML to JSON.

Installation
============
Expand All @@ -19,30 +26,50 @@ Usage
=====

>>> import duckduckgo
>>> r = duckduckgo.query('Duck Duck Go')
>>> r = duckduckgo.query('DuckDuckGo')
>>> r.type
'answer'
u'answer'
>>> r.results[0].text
'Official site'
u'Official site'
>>> r.results[0].url
'http://duckduckgo.com/'
u'http://duckduckgo.com/'
>>> r.abstract.url
'http://en.wikipedia.org/wiki/Duck_Duck_Go'
u'http://en.wikipedia.org/wiki/Duck_Duck_Go'
>>> r.abstract.source
'Wikipedia'
u'Wikipedia'

>>> r = duckduckgo.query('Python')
>>> r.type
'disambiguation'
>>> r.related[6].text
'Python (programming language), a computer programming language'
>>> r.related[6].url
'http://duckduckgo.com/Python_(programming_language)'
u'disambiguation'
>>> r.related[1].text
u'Python (programming language), a computer programming language'
>>> r.related[1].url
u'http://duckduckgo.com/Python_(programming_language)'
>>> r.related[7].topics[0].text # weird, but this is how the DDG API is currently organized
u'Armstrong Siddeley Python, an early turboprop engine'


>>> r = duckduckgo.query('1 + 1')
>>> r.type
'nothing'
u'nothing'
>>> r.answer.text
'1 + 1 = 2'
u'1 + 1 = 2'
>>> r.answer.type
'calc'
u'calc'

>>> print duckduckgo.query('19301', kad='es_ES').answer.text
19301 es un código postal de Paoli, PA
>>> print duckduckgo.query('how to spell test', html=True).answer.text
<b>Test</b> appears to be spelled right!<br/><i>Suggestions: </i>test, testy, teat, tests, rest, yest.

The easiest method of quickly grabbing the best (hopefully) API result is to use duckduckgo.get_zci::
>>> print duckduckgo.get_zci('foo')
The terms foobar /ˈfʊːbɑːr/, fubar, or foo, bar, baz and qux are sometimes used as placeholder names in computer programming or computer-related documentation. (https://en.wikipedia.org/wiki/Foobar)
>>> print ddg.get_zci('foo fighters site')
http://www.foofighters.com/us/home

Special keyword args for query():
- useragent - string, The useragent used to make API calls. This is somewhat irrelevant, as they are not logged or used on DuckDuckGo, but it is retained for backwards compatibility.
- safesearch - boolean, enable or disable safesearch.
- html - boolean, Allow HTML in responses?

240 changes: 131 additions & 109 deletions duckduckgo.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
#!/usr/bin/env python
# duckduckgo.py - Library for querying the DuckDuckGo API
#
# Copyright (c) 2010 Michael Stephens <[email protected]>
# Copyright (c) 2012-2013 Michael Smith <[email protected]>
#
# See LICENSE for terms of usage, modification and redistribution.

import urllib
import urllib2
from xml.etree import ElementTree
import json as j
import sys

__version__ = 0.1
__version__ = 0.242


def query(query, useragent='python-duckduckgo 0.1'):
def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=True, html=False, meanings=True, **kwargs):
"""
Query Duck Duck Go, returning a Results object.
Query DuckDuckGo, returning a Results object.

Here's a query that's unlikely to change:

Expand All @@ -19,146 +26,161 @@ def query(query, useragent='python-duckduckgo 0.1'):
'1 + 1 = 2'
>>> result.answer.type
'calc'
"""
params = urllib.urlencode({'q': query, 'o': 'x'})
url = 'http://duckduckgo.com/?' + params

Keword arguments:
useragent: UserAgent to use while querying. Default: "python-duckduckgo %d" (str)
safesearch: True for on, False for off. Default: True (bool)
html: True to allow HTML in output. Default: False (bool)
meanings: True to include disambiguations in results (bool)
Any other keyword arguments are passed directly to DuckDuckGo as URL params.
""" % __version__

safesearch = '1' if safesearch else '-1'
html = '0' if html else '1'
meanings = '0' if meanings else '1'
params = {
'q': query,
'o': 'json',
'kp': safesearch,
'no_redirect': '1',
'no_html': html,
'd': meanings,
}
params.update(kwargs)
encparams = urllib.urlencode(params)
url = 'http://api.duckduckgo.com/?' + encparams

request = urllib2.Request(url, headers={'User-Agent': useragent})
response = urllib2.urlopen(request)
xml = ElementTree.fromstring(response.read())
json = j.loads(response.read())
response.close()

return Results(xml)
return Results(json)


class Results(object):

def __init__(self, xml):
def __init__(self, json):
self.type = {'A': 'answer', 'D': 'disambiguation',
'C': 'category', 'N': 'name',
'E': 'exclusive', '': 'nothing'}[xml.findtext('Type', '')]
'E': 'exclusive', '': 'nothing'}.get(json.get('Type',''), '')

self.api_version = xml.attrib.get('version', None)
self.json = json
self.api_version = None # compat

self.heading = xml.findtext('Heading', '')
self.heading = json.get('Heading', '')

self.results = [Result(elem) for elem in xml.getiterator('Result')]
self.results = [Result(elem) for elem in json.get('Results',[])]
self.related = [Result(elem) for elem in
xml.getiterator('RelatedTopic')]
json.get('RelatedTopics',[])]

self.abstract = Abstract(xml)

answer_xml = xml.find('Answer')
if answer_xml is not None:
self.answer = Answer(answer_xml)
if not self.answer.text:
self.answer = None
else:
self.answer = None
self.abstract = Abstract(json)
self.redirect = Redirect(json)
self.definition = Definition(json)
self.answer = Answer(json)

image_xml = xml.find('Image')
if image_xml is not None and image_xml.text:
self.image = Image(image_xml)
else:
self.image = None
self.image = Image({'Result':json.get('Image','')})


class Abstract(object):

def __init__(self, xml):
self.html = xml.findtext('Abstract', '')
self.text = xml.findtext('AbstractText', '')
self.url = xml.findtext('AbstractURL', '')
self.source = xml.findtext('AbstractSource')
def __init__(self, json):
self.html = json.get('Abstract', '')
self.text = json.get('AbstractText', '')
self.url = json.get('AbstractURL', '')
self.source = json.get('AbstractSource')

class Redirect(object):

class Result(object):
def __init__(self, json):
self.url = json.get('Redirect', '')

def __init__(self, xml):
self.html = xml.text
self.text = xml.findtext('Text')
self.url = xml.findtext('FirstURL')
class Result(object):

icon_xml = xml.find('Icon')
if icon_xml is not None:
self.icon = Image(icon_xml)
def __init__(self, json):
self.topics = json.get('Topics', [])
if self.topics:
self.topics = [Result(t) for t in self.topics]
return
self.html = json.get('Result')
self.text = json.get('Text')
self.url = json.get('FirstURL')

icon_json = json.get('Icon')
if icon_json is not None:
self.icon = Image(icon_json)
else:
self.icon = None


class Image(object):

def __init__(self, xml):
self.url = xml.text
self.height = xml.attrib.get('height', None)
self.width = xml.attrib.get('width', None)
def __init__(self, json):
self.url = json.get('Result')
self.height = json.get('Height', None)
self.width = json.get('Width', None)


class Answer(object):

def __init__(self, xml):
self.text = xml.text
self.type = xml.attrib.get('type', '')
def __init__(self, json):
self.text = json.get('Answer')
self.type = json.get('AnswerType', '')

class Definition(object):
def __init__(self, json):
self.text = json.get('Definition','')
self.url = json.get('DefinitionURL')
self.source = json.get('DefinitionSource')

def main():
import sys
from optparse import OptionParser

parser = OptionParser(usage="usage: %prog [options] query",
version="ddg %s" % __version__)
parser.add_option("-o", "--open", dest="open", action="store_true",
help="open results in a browser")
parser.add_option("-n", dest="n", type="int", default=3,
help="number of results to show")
parser.add_option("-d", dest="d", type="int", default=None,
help="disambiguation choice")
(options, args) = parser.parse_args()
q = ' '.join(args)

if options.open:
import urllib
import webbrowser

webbrowser.open("http://duckduckgo.com/?%s" % urllib.urlencode(
dict(q=q)), new=2)

sys.exit(0)

results = query(q)

if options.d and results.type == 'disambiguation':
try:
related = results.related[options.d - 1]
except IndexError:
print "Invalid disambiguation number."
sys.exit(1)
results = query(related.url.split("/")[-1].replace("_", " "))

if results.answer and results.answer.text:
print "Answer: %s\n" % results.answer.text
elif results.abstract and results.abstract.text:
print "%s\n" % results.abstract.text

if results.type == 'disambiguation':
print ("'%s' can mean multiple things. You can re-run your query "
"and add '-d #' where '#' is the topic number you're "
"interested in.\n" % q)

for i, related in enumerate(results.related[0:options.n]):
name = related.url.split("/")[-1].replace("_", " ")
summary = related.text
if len(summary) < len(related.text):
summary += "..."
print '%d. %s: %s\n' % (i + 1, name, summary)
else:
for i, result in enumerate(results.results[0:options.n]):
summary = result.text[0:70].replace("&nbsp;", " ")
if len(summary) < len(result.text):
summary += "..."
print "%d. %s" % (i + 1, summary)
print " <%s>\n" % result.url

def get_zci(q, web_fallback=True, priority=['answer', 'abstract', 'related.0', 'definition'], urls=True, **kwargs):
'''A helper method to get a single (and hopefully the best) ZCI result.
priority=list can be used to set the order in which fields will be checked for answers.
Use web_fallback=True to fall back to grabbing the first web result.
passed to query. This method will fall back to 'Sorry, no results.'
if it cannot find anything.'''

if __name__ == '__main__':
main()
ddg = query('\\'+q, **kwargs)
response = ''

for p in priority:
ps = p.split('.')
type = ps[0]
index = int(ps[1]) if len(ps) > 1 else None

result = getattr(ddg, type)
if index is not None:
if not hasattr(result, '__getitem__'): raise TypeError('%s field is not indexable' % type)
result = result[index] if len(result) > index else None
if not result: continue

if result.text: response = result.text
if result.text and hasattr(result,'url') and urls:
if result.url: response += ' (%s)' % result.url
if response: break

# if there still isn't anything, try to get the first web result
if not response and web_fallback:
if ddg.redirect.url:
response = ddg.redirect.url

# final fallback
if not response:
response = 'Sorry, no results.'

return response

def main():
if len(sys.argv) > 1:
q = query(' '.join(sys.argv[1:]))
keys = q.json.keys()
keys.sort()
for key in keys:
sys.stdout.write(key)
if type(q.json[key]) in [str,unicode,int]: print(':', q.json[key])
else:
sys.stdout.write('\n')
for i in q.json[key]: print('\t',i)
else:
print('Usage: %s [query]' % sys.argv[0])
Loading