Skip to content

Commit 39ab439

Browse files
committed
Move rewriting stuff from warc2zim to zimscraperlib
1 parent 8b6cbac commit 39ab439

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+10056
-78
lines changed

.github/workflows/Publish.yaml

+127-7
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,101 @@
1-
name: Build and upload to PyPI
1+
name: Build and publish to PyPI / NPM
22

33
on:
44
release:
55
types: [published]
66

77
jobs:
8-
publish:
9-
runs-on: ubuntu-22.04
8+
generate-rules:
9+
runs-on: ubuntu-24.04
10+
11+
steps:
12+
- name: Checkout repo
13+
uses: actions/checkout@v4
14+
15+
- name: Set up Python
16+
uses: actions/setup-python@v5
17+
with:
18+
python-version-file: pyproject.toml
19+
architecture: x64
20+
21+
- name: Install dependencies (and project)
22+
run: |
23+
pip install -U pip
24+
pip install -e .[scripts]
25+
26+
- name: Generate rules
27+
run: |
28+
python rules/generate_rules.py
29+
30+
- name: Save rules artifact
31+
uses: actions/upload-artifact@v4
32+
with:
33+
path: |
34+
src/zimscraperlib/rewriting/rules.py
35+
tests/rewriting/test_fuzzy_rules.py
36+
javascript/src/fuzzyRules.js
37+
javascript/test/fuzzyRules.js
38+
name: rules
39+
retention-days: 1
40+
41+
build-js:
42+
runs-on: ubuntu-24.04
43+
needs: generate-rules
44+
45+
steps:
46+
- name: Checkout repo
47+
uses: actions/checkout@v4
48+
49+
- name: Restore rules artifact
50+
uses: actions/download-artifact@v4
51+
with:
52+
name: rules
53+
54+
- name: Setup Node.JS
55+
uses: actions/setup-node@v4
56+
with:
57+
node-version-file: 'javascript/package.json'
58+
59+
- name: Install JS dependencies
60+
run: yarn install
61+
working-directory: javascript
62+
63+
- name: Build production JS
64+
run: yarn build-prod
65+
working-directory: javascript
66+
67+
- name: Save wombat-setup artifact
68+
uses: actions/upload-artifact@v4
69+
with:
70+
path: javascript/dist/wombatSetup.js
71+
name: wombat-setup
72+
retention-days: 1
73+
74+
publish-python:
75+
runs-on: ubuntu-24.04
76+
needs:
77+
- generate-rules # to have proper Python rules files (src and tests)
78+
- build-js # to have proper wombatSetup.js (needs to be included in sdist)
1079
permissions:
11-
id-token: write # mandatory for PyPI trusted publishing
80+
id-token: write # mandatory for PyPI trusted publishing
1281

1382
steps:
14-
- uses: actions/checkout@v3
83+
- name: Checkout repo
84+
uses: actions/checkout@v4
85+
86+
- name: Restore rules artifact
87+
uses: actions/download-artifact@v4
88+
with:
89+
name: rules
90+
91+
- name: Restore wombat-setup artifact
92+
uses: actions/download-artifact@v4
93+
with:
94+
name: wombat-setup
95+
path: src/zimscraperlib/rewriting/statics/wombatSetup.js
1596

1697
- name: Set up Python
17-
uses: actions/setup-python@v4
98+
uses: actions/setup-python@v5
1899
with:
19100
python-version-file: pyproject.toml
20101
architecture: x64
@@ -24,5 +105,44 @@ jobs:
24105
pip install -U pip build
25106
python -m build --sdist --wheel
26107
27-
- name: Upload to PyPI
108+
- name: Publish to PyPI
28109
uses: pypa/gh-action-pypi-publish@release/v1.8
110+
# OPTIONAL PUBLICATION TO NPM, NOT NEEDED BY SCRAPERS IN THE END
111+
112+
# publish-js:
113+
# runs-on: ubuntu-24.04
114+
# needs:
115+
# - generate-rules
116+
117+
# steps:
118+
# - name: Checkout repo
119+
# uses: actions/checkout@v4
120+
121+
# - name: Restore rules artifact
122+
# uses: actions/download-artifact@v4
123+
# with:
124+
# name: rules
125+
126+
# - name: Setup Node.JS
127+
# uses: actions/setup-node@v4
128+
# with:
129+
# node-version-file: 'javascript/package.json'
130+
# registry-url: 'https://registry.npmjs.org' # Setup .npmrc file to publish to npm
131+
132+
# - name: Install JS dependencies
133+
# run: yarn install
134+
# working-directory: javascript
135+
136+
# - name: Build production JS
137+
# run: yarn build-prod
138+
# working-directory: javascript
139+
140+
# - name: Build JS package
141+
# run: yarn pack
142+
# working-directory: javascript
143+
144+
# - name: Publish to NPM
145+
# run: npm publish $(ls *.tgz) --provenance --access public
146+
# working-directory: javascript
147+
# env:
148+
# NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}

.github/workflows/PublishDev.yaml

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: Publish dev wombat-setup
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
8+
jobs:
9+
publish-dev-wombat-setup:
10+
runs-on: ubuntu-24.04
11+
12+
steps:
13+
- name: Checkout repo
14+
uses: actions/checkout@v4
15+
16+
- name: Set up Python
17+
uses: actions/setup-python@v5
18+
with:
19+
python-version-file: pyproject.toml
20+
architecture: x64
21+
22+
- name: Install dependencies (and project)
23+
run: |
24+
pip install -U pip
25+
pip install -e .[scripts]
26+
27+
- name: Generate rules
28+
run: |
29+
python rules/generate_rules.py
30+
31+
- name: Setup Node.JS
32+
uses: actions/setup-node@v4
33+
with:
34+
node-version-file: 'javascript/package.json'
35+
registry-url: 'https://registry.npmjs.org'
36+
37+
- name: Install JS dependencies
38+
run: yarn install
39+
working-directory: javascript
40+
41+
- name: Build production JS
42+
run: yarn build-prod
43+
working-directory: javascript
44+
45+
- name: Upload wombatSetup.js to dev drive
46+
run: |
47+
curl -f -u "${{ secrets.DEV_DRIVE_WEBDAV_CREDENTIALS }}" -T javascript/dist/wombatSetup.js -sw '%{http_code}' "https://dev.kiwix.org/zimscraperlib/"

.github/workflows/QA.yaml

+74-4
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,54 @@ on:
77
- main
88

99
jobs:
10-
check-qa:
11-
runs-on: ubuntu-22.04
10+
generate-rules:
11+
runs-on: ubuntu-24.04
1212

1313
steps:
14-
- uses: actions/checkout@v3
14+
- name: Checkout repo
15+
uses: actions/checkout@v4
1516

1617
- name: Set up Python
17-
uses: actions/setup-python@v4
18+
uses: actions/setup-python@v5
19+
with:
20+
python-version-file: pyproject.toml
21+
architecture: x64
22+
23+
- name: Install dependencies (and project)
24+
run: |
25+
pip install -U pip
26+
pip install -e .[scripts]
27+
28+
- name: Generate rules
29+
run: |
30+
python rules/generate_rules.py
31+
32+
- name: Save rules artifact
33+
uses: actions/upload-artifact@v4
34+
with:
35+
path: |
36+
src/zimscraperlib/rewriting/rules.py
37+
tests/rewriting/test_fuzzy_rules.py
38+
javascript/src/fuzzyRules.js
39+
javascript/test/fuzzyRules.js
40+
name: rules
41+
retention-days: 1
42+
43+
check-python-qa:
44+
runs-on: ubuntu-24.04
45+
needs: generate-rules
46+
47+
steps:
48+
- name: Checkout repo
49+
uses: actions/checkout@v4
50+
51+
- name: Restore rules artifact
52+
uses: actions/download-artifact@v4
53+
with:
54+
name: rules
55+
56+
- name: Set up Python
57+
uses: actions/setup-python@v5
1858
with:
1959
python-version-file: pyproject.toml
2060
architecture: x64
@@ -32,3 +72,33 @@ jobs:
3272

3373
- name: Check pyright
3474
run: inv check-pyright
75+
76+
check-javascript-qa:
77+
runs-on: ubuntu-24.04
78+
needs: generate-rules
79+
80+
steps:
81+
- name: Checkout repo
82+
uses: actions/checkout@v4
83+
84+
- name: Restore rules artifact
85+
uses: actions/download-artifact@v4
86+
with:
87+
name: rules
88+
89+
- name: Setup Node.JS
90+
uses: actions/setup-node@v4
91+
with:
92+
node-version-file: 'javascript/package.json'
93+
94+
- name: Install JS dependencies
95+
working-directory: javascript
96+
run: yarn install
97+
98+
- name: Check prettier formatting
99+
working-directory: javascript
100+
run: yarn prettier-check
101+
102+
- name: Check eslint rules
103+
working-directory: javascript
104+
run: yarn eslint

0 commit comments

Comments
 (0)