Skip to content

Commit 7b9c51a

Browse files
committed
print
1 parent c1b6ba5 commit 7b9c51a

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

not-archived.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,15 @@ def main():
3838
limit = 500
3939
wikis = []
4040
while True:
41+
#query does not retrieve wikifarms wikis, fix it? https://wikiapiary.com/wiki/Reiser4_FS_Wiki
4142
url = 'https://wikiapiary.com/wiki/Special:Ask/-5B-5BCategory:Website-20not-20archived-5D-5D-20-5B-5BIs-20defunct::False-5D-5D-20-5B-5BIs-20in-20farm::False-5D-5D/-3F%%3DWiki-23/-3FHas-20API-20URL%%3DAPI/-3FHas-20pages-20count%%3DPages/-3FHas-20images-20count%%3DImages/format%%3Dtable/limit%%3D%d/link%%3Dall/sort%%3DHas-20pages-20count,Has-20images-20count/order%%3Dasc/mainlabel%%3DWiki/searchlabel%%3D%%E2%%80%%A6-20further-20results/offset%%3D%d' % (limit, offset)
4243
f = urllib.urlopen(url)
4344
raw = f.read()
4445
m = re.findall(ur'(?im)<tr class="row-(?:odd|even)"><td class="[^<>]+?"><a href="/wiki/[^<>]+?" title="[^<>]+?">([^<>]+?)</a></td><td class="[^<>]+?"><a href="/wiki/[^<>]+?" title="[^<>]+?">[^<>]+?</a></td><td class="[^<>]+?"><a class="external" rel="nofollow" href="([^<>]+?)">[^<>]+?</a></td><td data-sort-value="([^<>]+?)" class="[^<>]+?">[^<>]+?</td><td data-sort-value="([^<>]+?)" class="[^<>]+?">[^<>]+?</td></tr>', raw)
4546
for i in m:
4647
domain = getdomain(i[1])
4748
if domain not in donewikis and not domain.endswith('editthis.info') and not domain.endswith('wiki-site.com'):
48-
print i[0], i[1], i[2], i[3]
49+
print i[1], i[2], i[3], i[0]
4950

5051
if not re.search(ur'rel="nofollow">Next</a>', raw):
5152
break

0 commit comments

Comments
 (0)