Skip to content

Commit b3bad2e

Browse files
author
Keiron Pizzey
authored
Merge pull request #1 from sopython/dev
Dev
2 parents 1bee5bd + 366b5ff commit b3bad2e

File tree

9 files changed

+206
-25
lines changed

9 files changed

+206
-25
lines changed

HISTORY.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
History
33
=======
44

5+
0.2.0 (2016-07-08)
6+
------------------
7+
8+
* Implemented basic eridu logic to access SE.
9+
* Added CLI that will get content on a set schedule.
10+
511
0.1.0 (2016-07-07)
612
------------------
713

eridu/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,7 @@
22

33
__author__ = 'Keiron J. Pizzey'
44
__email__ = '[email protected]'
5-
__version__ = '0.1.0'
5+
__version__ = '0.2.0'
6+
7+
8+
from eridu.core import get_post_ids, split_post_ids, get_questions, get_answers, filter_posts_by_tag

eridu/cli.py

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,60 @@
11
# -*- coding: utf-8 -*-
22

3+
import time
4+
35
import click
6+
import schedule
7+
8+
from eridu.logger import logger
9+
from eridu.config import FILTER_TAGS, SECONDS_BETWEEN_REQUESTS
10+
from eridu.core import get_post_ids, split_post_ids, get_questions, get_answers, filter_posts_by_tag
411

512
@click.command()
6-
def main(args=None):
13+
@click.option('--start', default=1, help="Starting page for historic results.")
14+
def main(start):
715
"""Console script for eridu"""
8-
click.echo("Replace this message by putting your code into "
9-
"eridu.cli.main")
10-
click.echo("See click documentation at http://click.pocoo.org/")
16+
logger.info('Starting main function in Eridu service.')
17+
18+
page = start or 0
19+
params = {'page': page}
20+
tags = [s.strip() for s in FILTER_TAGS.split(',')]
21+
22+
logger.info('Starting on page {}'.format(page))
23+
logger.info('Filtering to include the following tags: {}.'.format(tags))
24+
25+
def run(params):
26+
page = params.get('page')
27+
logger.info('Getting posts for page {}.'.format(page))
28+
29+
post_ids = get_post_ids(page)
30+
ids = split_post_ids(post_ids['items'])
31+
32+
questions = get_questions(ids['question_ids'])
33+
questions = filter_posts_by_tag(questions['items'], tags)
34+
for question in questions:
35+
try:
36+
print(question)
37+
except UnicodeEncodeError:
38+
pass
39+
40+
print('\n\n')
41+
42+
answers = get_answers(ids['answer_ids'])
43+
answers = filter_posts_by_tag(answers['items'], tags)
44+
for answer in answers:
45+
try:
46+
print(answer)
47+
except UnicodeEncodeError:
48+
pass
49+
50+
params['page'] += 1
51+
52+
53+
schedule.every(SECONDS_BETWEEN_REQUESTS).seconds.do(run, params=params)
54+
55+
while True:
56+
schedule.run_pending()
57+
time.sleep(1)
1158

1259

1360
if __name__ == "__main__":

eridu/config.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import os
2+
3+
4+
ACCESS_KEY = os.environ.get('STACKEXCHANGE_REQUESTS_KEY')
5+
SITE = os.environ.get('ERIDU_SITE', 'stackoverflow')
6+
7+
POST_IDS_URL = "https://api.stackexchange.com/2.2/posts"
8+
POST_IDS_NUMBER = os.environ.get('ERIDU_POST_IDS_NUMBER', 100)
9+
POST_IDS_FILTER = os.environ.get('ERIDU_POST_IDS_FILTER', '!3tz1WbZW5IHcz*twZ')
10+
POST_IDS_SORT = os.environ.get('ERIDU_POST_IDS_SORT', 'creation')
11+
POST_IDS_ORDER = os.environ.get('ERIDU_POST_IDS_ORDER', 'asc')
12+
13+
QUESTIONS_URL = "https://api.stackexchange.com/2.2/questions/{}"
14+
QUESTIONS_FILTER = os.environ.get('ERIDU_QUESTIONS_FILTER', '!OfZM.T7F9gRuLlvhzHoyC1Fyd3oEOAMszsZJXvHk4mw')
15+
QUESTIONS_SORT = os.environ.get('ERIDU_QUESTIONS_SORT', 'creation')
16+
QUESTIONS_ORDER = os.environ.get('ERIDU_QUESTIONS_ORDER', 'asc')
17+
18+
ANSWERS_URL = "https://api.stackexchange.com/2.2/answers/{}"
19+
ANSWERS_FILTER = os.environ.get('ERIDU_ANSWERS_FILTER', '!Fcazzsr2b3Mo6cWaRk)J*C-n25')
20+
ANSWERS_SORT = os.environ.get('ERIDU_ANSWERS_SORT', 'creation')
21+
ANSWERS_ORDER = os.environ.get('ERIDU_ANSWERS_ORDER', 'asc')
22+
23+
FILTER_TAGS = os.environ.get('ERIDU_FILTER_TAGS', 'python,python-2.x,python-3.x')
24+
SECONDS_BETWEEN_REQUESTS = os.environ.get('ERIDU_SECONDS_BETWEEN_REQUESTS', 300)

eridu/core.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import time
2+
3+
import requests
4+
5+
from eridu.logger import logger
6+
from eridu.config import ACCESS_KEY, SITE
7+
from eridu.config import POST_IDS_URL, POST_IDS_FILTER, POST_IDS_NUMBER, POST_IDS_SORT, POST_IDS_ORDER
8+
from eridu.config import QUESTIONS_URL, QUESTIONS_FILTER, QUESTIONS_SORT, QUESTIONS_ORDER
9+
from eridu.config import ANSWERS_URL, ANSWERS_FILTER, ANSWERS_SORT, ANSWERS_ORDER
10+
11+
12+
13+
def get_questions(question_ids, url=QUESTIONS_URL, filter=QUESTIONS_FILTER, access_key=ACCESS_KEY, site=SITE, sort=QUESTIONS_SORT, order=QUESTIONS_ORDER):
14+
url = url.format(';'.join([str(i) for i in question_ids]))
15+
16+
payload = {
17+
"pagesize": 100,
18+
"key": access_key,
19+
"site": site,
20+
"sort": sort,
21+
"order": order,
22+
"filter": filter,
23+
}
24+
25+
logger.info('Getting questions with payload: {}'.format(payload))
26+
27+
r = requests.get(url, params=payload)
28+
29+
data = r.json()
30+
31+
if data.get('backoff') is not None:
32+
time.sleep(int(data.get('backoff')))
33+
34+
return data
35+
36+
def get_answers(answer_ids, url=ANSWERS_URL, filter=ANSWERS_FILTER, access_key=ACCESS_KEY, site=SITE, sort=ANSWERS_SORT, order=ANSWERS_ORDER):
37+
url = url.format(';'.join([str(i) for i in answer_ids]))
38+
39+
payload = {
40+
"pagesize": 100,
41+
"key": access_key,
42+
"site": site,
43+
"sort": sort,
44+
"order": order,
45+
"filter": filter,
46+
}
47+
48+
logger.info('Getting answers with payload: {}'.format(payload))
49+
50+
r = requests.get(url, params=payload)
51+
52+
data = r.json()
53+
54+
question_ids = [answer['question_id'] for answer in data['items']]
55+
56+
questions = get_questions(question_ids)
57+
58+
tags = {question['question_id']: question['tags'] for question in questions['items']}
59+
60+
for answer in data['items']:
61+
answer['tags'] = tags[answer['question_id']]
62+
63+
if data.get('backoff') is not None:
64+
time.sleep(int(data.get('backoff')))
65+
66+
return data
67+
68+
69+
def get_post_ids(page, url=POST_IDS_URL, filter=POST_IDS_FILTER, n_posts=POST_IDS_NUMBER, access_key=ACCESS_KEY, site=SITE, sort=POST_IDS_SORT, order=POST_IDS_ORDER):
70+
payload = {
71+
"pagesize": n_posts,
72+
"page": page,
73+
"key": access_key,
74+
"site": site,
75+
"sort": sort,
76+
"order": order,
77+
"filter": filter,
78+
}
79+
logger.info('Getting post ids with payload: {}'.format(payload))
80+
81+
r = requests.get(url, params=payload)
82+
83+
data = r.json()
84+
85+
if data.get('backoff') is not None:
86+
time.sleep(int(data.get('backoff')))
87+
88+
return data
89+
90+
91+
def split_post_ids(post_ids):
92+
logger.info('Splitting post ids into question and answer ids')
93+
94+
question_ids, answer_ids = [], []
95+
96+
for item in post_ids:
97+
if item['post_type'] == "question":
98+
question_ids.append(item['post_id'])
99+
elif item['post_type'] == 'answer':
100+
answer_ids.append(item['post_id'])
101+
102+
return {
103+
"question_ids": question_ids,
104+
"answer_ids": answer_ids
105+
}
106+
107+
def filter_posts_by_tag(posts, tags):
108+
tags = set(tags)
109+
return [post for post in posts if set(post['tags']) & tags]

eridu/eridu.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

eridu/logger.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import logging
2+
3+
logger = logging.getLogger('eridu')
4+
5+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
6+
7+
ch = logging.StreamHandler()
8+
ch.setFormatter(formatter)
9+
10+
logger.setLevel(logging.INFO)
11+
logger.addHandler(ch)

setup.cfg

Lines changed: 0 additions & 18 deletions
This file was deleted.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
setup(
2222
name='sopython-eridu',
23-
version='0.1.0',
23+
version='0.2.0',
2424
description="Project Cradle library for accessing historic SO content.",
2525
long_description=readme + '\n\n' + history,
2626
author="Keiron J. Pizzey",

0 commit comments

Comments
 (0)