Skip to content

Commit ed93ce9

Browse files
committed
Update fetch_repo_info to impls JSON, py3, and Authorization header
1. Access implementations list from JSON file, not from HTML. 2. Port from Python 2 to Python 3. 3. Use Authorization header to authenticate with GitHub instead of deprecated query parameter.
1 parent 7cab177 commit ed93ce9

File tree

1 file changed

+28
-25
lines changed

1 file changed

+28
-25
lines changed

fetch_repo_info.py

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1-
#!/usr/bin/env python2
1+
#!/usr/bin/env python3
22

33
# This script is under the public domain.
44

5-
from bs4 import BeautifulSoup
5+
import base64
66
import json
77
import os
88
import re
99
import time
10-
import urllib2
10+
from urllib.error import URLError
11+
from urllib.request import urlopen, Request
1112

1213
"""
1314
Possibly interesting fields:
@@ -25,18 +26,24 @@
2526
def get_repo(repo_id):
2627
if repo_id is None:
2728
return None
28-
api_url = 'https://api.github.com/repos/%s' % repo_id
29+
headers = {}
30+
url = 'https://api.github.com/repos/%s' % repo_id
31+
32+
# Expected format: username:hexadecimalpersonalaccesstoken.
2933
try:
30-
api_url2 = '%s?access_token=%s' % (api_url, os.environ['GITHUB_TOKEN'])
34+
credentials = os.environ['GITHUB_TOKEN']
3135
except KeyError:
32-
api_url2 = api_url
36+
pass
37+
else:
38+
credentials = base64.b64encode(bytes(credentials, 'utf-8'))
39+
headers['Authorization'] = b'Basic ' + credentials
3340

3441
try:
35-
response = urllib2.urlopen(api_url2)
36-
except urllib2.HTTPError, e:
37-
print 'Warning: URL %s returned status %d' % (api_url, e.code)
42+
response = urlopen(Request(url, headers=headers))
43+
except URLError as e:
44+
print(f'Warning: Fetching {url} failed: {e.reason}')
3845
try:
39-
print json.load(e)
46+
print(json.load(e))
4047
except:
4148
pass
4249
return None
@@ -55,31 +62,26 @@ def get_repo_score(repo):
5562
def repo_url_to_id(url):
5663
if url is None:
5764
return None
58-
m = re.match(r'https?://github.com/([^/#]+/[^/#]+)/?', url)
65+
m = re.match(r'https://github.com/([^/#]+/[^/#]+)/?', url)
5966
if m is None:
6067
return None
6168
else:
6269
return m.group(1)
6370

64-
def get_all_urls():
65-
response = urllib2.urlopen('http://raft.github.io')
66-
content = BeautifulSoup(response)
67-
urls = [link.get('href') for link in content.find_all('a')]
68-
return list(set(urls))
69-
7071
def get_all_repos():
71-
urls = get_all_urls()
72+
response = urlopen('https://raft.github.io/implementations.json')
73+
impls = json.load(response)
74+
urls = [impl['repoURL'] for impl in impls]
7275
repos = [(url, get_repo(repo_url_to_id(url)))
7376
for url in urls]
7477
repos = [(url, repo)
7578
for url, repo in repos
7679
if repo is not None]
7780
return repos
7881

79-
def rank(repos, sort_key, result_key, reverse=False):
80-
for rank, (url, repo) in enumerate(sorted(repos,
81-
key=lambda (url, repo): sort_key(repo),
82-
reverse=reverse)):
82+
def rank(repos, sort_key, result_key):
83+
for rank, (_url, repo) in enumerate(sorted(repos,
84+
key=lambda repo: sort_key(repo[1]))):
8385
repo[result_key] = rank
8486

8587
def main(filename='repos.jsonp'):
@@ -90,9 +92,9 @@ def main(filename='repos.jsonp'):
9092
rank(repos,
9193
sort_key=lambda repo: repo.get('updated_at', '1970-01-01T00:00:00Z'),
9294
result_key='updated_rank')
93-
for url, repo in repos:
95+
for _url, repo in repos:
9496
repo['rank'] = repo['stars_rank'] + repo['updated_rank']
95-
repos.sort(key=lambda (url, repo): repo['rank'], reverse=True)
97+
repos.sort(key=lambda repo: repo[1]['rank'], reverse=True)
9698
f = open(filename, 'w')
9799
f.write('var raft_repos = function() {\n')
98100
f.write('return ')
@@ -101,7 +103,8 @@ def main(filename='repos.jsonp'):
101103
'stars': repo['stargazers_count'],
102104
'updated': repo['updated_at']})
103105
for (url, repo) in repos]),
104-
f)
106+
f,
107+
indent=4)
105108
f.write(';\n')
106109
f.write('};\n')
107110

0 commit comments

Comments
 (0)