1
+ #!/usr/bin/env python3
2
+ # coding=utf-8
3
+ __author__ = 'smallfly'
4
+
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
+
8
+ NEWS_ADDRESS = "https://ielts.etest.net.cn/allnews"
9
+
10
+ class NewsItem :
11
+
12
+ def __init__ (self , date , title , url ):
13
+ self .__data = {
14
+ "date" : date ,
15
+ "title" : title ,
16
+ "url" : url
17
+ }
18
+
19
+ def __repr__ (self ):
20
+ return repr (self .__data )
21
+
22
+ def __getattr__ (self , item ):
23
+ if item in self .__data :
24
+ return self .__data [item ]
25
+ raise AttributeError ("NewsItem has no attribute named {}." .format (item ))
26
+
27
+
28
+ def get_raw_news (address ):
29
+ resp = requests .get (address )
30
+ if resp .ok :
31
+ resp .encoding = "UTF-8"
32
+ return resp .text
33
+ else :
34
+ return None
35
+
36
+ def parse_news (content ):
37
+ soup = BeautifulSoup (content )
38
+ all_news_li = soup .find_all ("li" , {"class" : "main-sub-act-new" })
39
+ news_items = []
40
+ for li in all_news_li :
41
+ # 存有新闻链接a标签的span标签
42
+ span_with_a_tag = li .contents [0 ]
43
+ # 存有发布时间的span标签
44
+ span_with_date = li .contents [1 ]
45
+ # 存放新闻链接的a标签
46
+ a_tag = span_with_a_tag .a
47
+ date_string = span_with_date .string .strip ()
48
+ news_items .append (NewsItem (date_string [1 : len (date_string ) - 1 ], a_tag .string .strip (), a_tag ["href" ]))
49
+ # print(span_with_date.string.strip(), a_tag.string, a_tag["href"])
50
+ return news_items
51
+
52
+ def pretty_print (items ):
53
+ items .sort (key = lambda x : x .date , reverse = True )
54
+ for item in items :
55
+ print ("Date:" , item .date )
56
+ print ("Title:" , item .title )
57
+ print ("URL:" , item .url )
58
+ print ()
59
+
60
+ if __name__ == "__main__" :
61
+ content = get_raw_news (NEWS_ADDRESS )
62
+ if content is not None :
63
+ items = parse_news (content )
64
+ pretty_print (items )
0 commit comments