Skip to content

Commit 92657a0

Browse files
committed
refactor crawling leetcode with selenium and webdriver
1 parent 67d891b commit 92657a0

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed

scripts/leetcode.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
5+
from selenium import webdriver
6+
7+
8+
class Leetcode(object):
9+
10+
def __init__(self):
11+
chrome_options = webdriver.ChromeOptions()
12+
chrome_options.add_argument('--headless')
13+
chrome_options.add_argument('--disable-gpu')
14+
self.driver = webdriver.Chrome(chrome_options=chrome_options)
15+
16+
def open_url(self, url):
17+
self.url = url
18+
print('open URL: {}'.format(url))
19+
self.driver.get(url)
20+
21+
def teardown(self):
22+
self.driver.close()
23+
24+
def get_title(self):
25+
print('get title...')
26+
raw_title = self.driver.title
27+
title = raw_title[:-len(' - LeetCode')].strip()
28+
return title
29+
30+
def get_description(self):
31+
print('get description...')
32+
elem = self.driver.find_element_by_class_name('question-description')
33+
return elem.get_attribute('innerHTML')
34+
35+
def get_difficulty(self):
36+
print('get difficulty...')
37+
elem = self.driver.find_element_by_class_name('difficulty-label')
38+
return elem.get_attribute('innerHTML')
39+
40+
def get_tags(self):
41+
print('get tags...')
42+
tags_id = self.driver.find_element_by_id('tags-topics')
43+
tags_id_a = tags_id.find_elements_by_tag_name('a')
44+
tags = []
45+
for i in tags_id_a:
46+
tag = i.get_attribute('innerHTML')
47+
tags.append(tag)
48+
return tags
49+
50+
def get_problem_all(self, url):
51+
"""获取所有细节"""
52+
print('get all the problem detail...')
53+
self.open_url(url)
54+
title = self.get_title()
55+
difficulty = self.get_difficulty()
56+
tags = self.get_tags()
57+
description = self.get_description()
58+
problem = {
59+
'title': title,
60+
'difficulty': difficulty,
61+
'tags': tags,
62+
'description': description
63+
}
64+
self.teardown()
65+
return problem
66+
67+
68+
if __name__ == '__main__':
69+
url = 'https://leetcode.com/problems/palindrome-number'
70+
leetcode = Leetcode()
71+
print(leetcode.get_problem_all(url))

0 commit comments

Comments
 (0)