-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatalekt.py
73 lines (66 loc) · 2.08 KB
/
datalekt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from bs4 import BeautifulSoup
import datetime, time
import json, re
import sqlite3
from feedgen.feed import FeedGenerator
import hashlib
import xml.dom.minidom
import cloudscraper
requests = cloudscraper.create_scraper()
fg = FeedGenerator()
json_data = requests.post('https://www.datalekt.nl/feeds/json.php').json()
unsorted_json = json_data['items']
sorted_json = sorted(unsorted_json, key=lambda x: x["date_published"])
for item in sorted_json:
original_date = item['date_published'][0:10]
date = datetime.datetime.strptime(original_date, "%Y-%m-%d")
date = date.timetuple()
date = time.mktime(date)
source = item['author']['name']
url = item['url']
title = item['title']
description = item['content_text']
id = item['id']
org = description.split("| ")[0]
category = description.split("| ")[1]
m = hashlib.md5()
m.update(url.encode('utf-8'))
id = str(id)
fe = fg.add_entry()
fe.id(id)
fe.title(title)
fe.description(description)
fe.link(href=url)
fe.category({'term': category})
fe.pubDate(datetime.datetime.strptime(original_date, "%Y-%m-%d").replace(tzinfo=datetime.timezone.utc))
try:
f = open("datalekt.json", "r")
except FileNotFoundError:
f = open("datalekt.json", "w")
f.write(json.dumps(json_data['items'], indent=4))
f.close()
f = open("datalekt.json", "r")
if f.read() == json.dumps(json_data['items'], indent=4):
# No changes
print("No changes")
exit()
else:
# Write Json
f = open("datalekt.json", "w")
f.write(json.dumps(json_data['items'], indent=4))
f.close()
# Write RSS
fg.id(f"12")
fg.title("DataLekt RSS feed")
fg.author({"name":"Gertje823"})
fg.link(href=f"https://datalekt.nl", rel="alternate")
fg.docs("https://github.com/Gertje823/datalekt-feed")
fg.description(f"RSS feed of datalekt.nl")
fg.language("nl")
fg.rss_file('rss.xml')
#fg.atom_file('atom.xml')
dom = xml.dom.minidom.parse('rss.xml')
pretty_xml_as_string = dom.toprettyxml()
f = open("rss.xml", "w")
f.write(pretty_xml_as_string)
f.close()