Skip to content

Commit 096184e

Browse files
committed
init
0 parents  commit 096184e

16 files changed

+694
-0
lines changed

.gitignore

+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
env.sh
2+
# Created by .ignore support plugin (hsz.mobi)
3+
### Python template
4+
# Byte-compiled / optimized / DLL files
5+
__pycache__/
6+
*.py[cod]
7+
*$py.class
8+
9+
# C extensions
10+
*.so
11+
12+
# Distribution / packaging
13+
.Python
14+
build/
15+
develop-eggs/
16+
dist/
17+
downloads/
18+
eggs/
19+
.eggs/
20+
lib/
21+
lib64/
22+
parts/
23+
sdist/
24+
var/
25+
wheels/
26+
pip-wheel-metadata/
27+
share/python-wheels/
28+
*.egg-info/
29+
.installed.cfg
30+
*.egg
31+
MANIFEST
32+
33+
# PyInstaller
34+
# Usually these files are written by a python script from a template
35+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
36+
*.manifest
37+
*.spec
38+
39+
# Installer logs
40+
pip-log.txt
41+
pip-delete-this-directory.txt
42+
43+
# Unit test / coverage reports
44+
htmlcov/
45+
.tox/
46+
.nox/
47+
.coverage
48+
.coverage.*
49+
.cache
50+
nosetests.xml
51+
coverage.xml
52+
*.cover
53+
*.py,cover
54+
.hypothesis/
55+
.pytest_cache/
56+
57+
# Translations
58+
*.mo
59+
*.pot
60+
61+
# Django stuff:
62+
*.log
63+
local_settings.py
64+
db.sqlite3
65+
db.sqlite3-journal
66+
67+
# Flask stuff:
68+
instance/
69+
.webassets-cache
70+
71+
# Scrapy stuff:
72+
.scrapy
73+
74+
# Sphinx documentation
75+
docs/_build/
76+
77+
# PyBuilder
78+
target/
79+
80+
# Jupyter Notebook
81+
.ipynb_checkpoints
82+
83+
# IPython
84+
profile_default/
85+
ipython_config.py
86+
87+
# pyenv
88+
.python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
98+
__pypackages__/
99+
100+
# Celery stuff
101+
celerybeat-schedule
102+
celerybeat.pid
103+
104+
# SageMath parsed files
105+
*.sage.py
106+
107+
# Environments
108+
.env
109+
.venv
110+
env/
111+
venv/
112+
ENV/
113+
env.bak/
114+
venv.bak/
115+
116+
# Spyder project settings
117+
.spyderproject
118+
.spyproject
119+
120+
# Rope project settings
121+
.ropeproject
122+
123+
# mkdocs documentation
124+
/site
125+
126+
# mypy
127+
.mypy_cache/
128+
.dmypy.json
129+
dmypy.json
130+
131+
# Pyre type checker
132+
.pyre/

Dockerfile

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
FROM python:3.6
2+
WORKDIR /app
3+
COPY requirements.txt .
4+
RUN pip3 install -r requirements.txt
5+
COPY run.sh .
6+
RUN sh run.sh
7+
ADD . .
8+
WORKDIR /app/spider
9+
CMD python3 run.py

deployment.yml

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
apiVersion: extensions/v1beta1
2+
kind: Deployment
3+
metadata:
4+
annotations:
5+
kompose.cmd: kompose convert -f docker-compose.yml -o deployment.yml
6+
kompose.version: 1.20.0 ()
7+
creationTimestamp: null
8+
labels:
9+
io.kompose.service: crawler-movie
10+
name: crawler-movie
11+
namespace: crawler
12+
spec:
13+
replicas: 1
14+
revisionHistoryLimit: 1
15+
strategy: {}
16+
template:
17+
metadata:
18+
annotations:
19+
kompose.cmd: kompose convert -f docker-compose.yml -o deployment.yml
20+
kompose.version: 1.20.0 ()
21+
creationTimestamp: null
22+
labels:
23+
io.kompose.service: crawler-movie
24+
spec:
25+
containers:
26+
- args:
27+
- python3
28+
- run.py
29+
env:
30+
- name: PGSQL_DATABASE
31+
value: scrape_book
32+
- name: PGSQL_HOST
33+
valueFrom:
34+
secretKeyRef:
35+
name: pgsql
36+
key: host
37+
- name: PGSQL_PASSWORD
38+
valueFrom:
39+
secretKeyRef:
40+
name: pgsql
41+
key: password
42+
- name: PGSQL_PORT
43+
valueFrom:
44+
secretKeyRef:
45+
name: pgsql
46+
key: port
47+
- name: PGSQL_USER
48+
valueFrom:
49+
secretKeyRef:
50+
name: pgsql
51+
key: user
52+
image: germey/crawler-movie
53+
name: crawler-movie
54+
resources:
55+
limits:
56+
memory: "200Mi"
57+
cpu: "150m"
58+
requests:
59+
memory: "200Mi"
60+
cpu: "150m"
61+
restartPolicy: Always
62+
status: {}

docker-compose.yml

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
version: '3'
2+
services:
3+
crawler-movie:
4+
container_name: 'crawler-movie'
5+
restart: always
6+
build: .
7+
image: 'germey/crawler-movie'
8+
command: 'python3 run.py'
9+
environment:
10+
PGSQL_HOST:
11+
PGSQL_PORT:
12+
PGSQL_USER:
13+
PGSQL_PASSWORD:

requirements.txt

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
scrapy==1.6.0
2+
Django==2.2.9
3+
django-cors-headers==3.2.0
4+
djangorestframework==3.11.0
5+
requests==2.22.0
6+
urllib3==1.25.7
7+
uwsgi==2.0.18
8+
psycopg2-binary==2.8.4
9+
scrapy_djangoitem==1.1.1
10+
dateparser==0.7.2
11+
merry==0.2.2
12+
environs==7.2.0
13+
furl==2.0.0

run.sh

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
git clone https://github.com/Germey/Scrape.git
2+
cp -r Scrape/src/dynamic1/backend ./backend
3+
rm -rf Scrape

spider/.gitignore

+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# Created by .ignore support plugin (hsz.mobi)
2+
### Python template
3+
# Byte-compiled / optimized / DLL files
4+
__pycache__/
5+
*.py[cod]
6+
*$py.class
7+
8+
# C extensions
9+
*.so
10+
11+
# Distribution / packaging
12+
.Python
13+
build/
14+
develop-eggs/
15+
dist/
16+
downloads/
17+
eggs/
18+
.eggs/
19+
lib/
20+
lib64/
21+
parts/
22+
sdist/
23+
var/
24+
wheels/
25+
pip-wheel-metadata/
26+
share/python-wheels/
27+
*.egg-info/
28+
.installed.cfg
29+
*.egg
30+
MANIFEST
31+
32+
# PyInstaller
33+
# Usually these files are written by a python script from a template
34+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
35+
*.manifest
36+
*.spec
37+
38+
# Installer logs
39+
pip-log.txt
40+
pip-delete-this-directory.txt
41+
42+
# Unit test / coverage reports
43+
htmlcov/
44+
.tox/
45+
.nox/
46+
.coverage
47+
.coverage.*
48+
.cache
49+
nosetests.xml
50+
coverage.xml
51+
*.cover
52+
*.py,cover
53+
.hypothesis/
54+
.pytest_cache/
55+
56+
# Translations
57+
*.mo
58+
*.pot
59+
60+
# Django stuff:
61+
*.log
62+
local_settings.py
63+
db.sqlite3
64+
db.sqlite3-journal
65+
66+
# Flask stuff:
67+
instance/
68+
.webassets-cache
69+
70+
# Scrapy stuff:
71+
.scrapy
72+
73+
# Sphinx documentation
74+
docs/_build/
75+
76+
# PyBuilder
77+
target/
78+
79+
# Jupyter Notebook
80+
.ipynb_checkpoints
81+
82+
# IPython
83+
profile_default/
84+
ipython_config.py
85+
86+
# pyenv
87+
.python-version
88+
89+
# pipenv
90+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
92+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
93+
# install all needed dependencies.
94+
#Pipfile.lock
95+
96+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
97+
__pypackages__/
98+
99+
# Celery stuff
100+
celerybeat-schedule
101+
celerybeat.pid
102+
103+
# SageMath parsed files
104+
*.sage.py
105+
106+
# Environments
107+
.env
108+
.venv
109+
env/
110+
venv/
111+
ENV/
112+
env.bak/
113+
venv.bak/
114+
115+
# Spyder project settings
116+
.spyderproject
117+
.spyproject
118+
119+
# Rope project settings
120+
.ropeproject
121+
122+
# mkdocs documentation
123+
/site
124+
125+
# mypy
126+
.mypy_cache/
127+
.dmypy.json
128+
dmypy.json
129+
130+
# Pyre type checker
131+
.pyre/
132+

spider/movie/__init__.py

Whitespace-only changes.

spider/movie/items.py

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Define here the models for your scraped items
4+
#
5+
# See documentation in:
6+
# https://doc.scrapy.org/en/latest/topics/items.html
7+
from scrapy_djangoitem import DjangoItem
8+
from app.models import Movie
9+
10+
11+
class MovieItem(DjangoItem):
12+
django_model = Movie

0 commit comments

Comments
 (0)