-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnaverblogapi.py
78 lines (58 loc) · 2.06 KB
/
naverblogapi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import sys
import urllib.request
import json
import requests
import numpy as np
import pandas as pd
from tqdm import tqdm
from bs4 import BeautifulSoup as bs
def naver_blog(sdata):
client_id = "4Gsl7IpgS8vEMmvfODda"
client_secret = "A3A_TqKzpW"
encText = urllib.parse.quote(sdata)
link_list = []
title_list = []
for item in range(1, 1001, 100):
url = "https://openapi.naver.com/v1/search/blog?query=" + encText + '&display=100' + '&start={0}'.format(
item) # JSON 결과
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id", client_id)
request.add_header("X-Naver-Client-Secret", client_secret)
response = urllib.request.urlopen(request)
rescode = response.getcode()
if (rescode == 200):
response_body = response.read()
blogdata = json.loads(response_body)
for blog_temp in blogdata['items']:
# ----중요----#
link = blog_temp['link'].replace('https://blog', 'https://m.blog')
title = blog_temp['title']
if 'naver' in link:
link_list.append(link)
title_list.append(title)
else:
print("Error Code:" + rescode)
df = pd.DataFrame({'제목': title_list, '주소': link_list})
# df.to_csv('삼육대학교.csv')
# count = 0
blog_text_list = []
for temp in tqdm(df['주소']):
try:
# print(temp)
html = requests.get(temp)
soup = bs(html.text, 'html.parser')
result = soup.find_all('span', class_='se-fs-')
# print(result)
# print(count)
# count += 1
blog_text = ''
for item in result:
blog_text += item.text
blog_text_list.append(blog_text)
except:
blog_text_list.append('')
print('총블로그수:',len(blog_text_list))
df['내용'] = blog_text_list
df.to_csv('팝콘.csv')
naver_blog('영화관+팝콘+포장')