Skip to content

Commit 7d71bf3

Browse files
authored
Add GH Action to create tranco_top_10k.csv and traco_top_50k.csv (#361)
1 parent 070c773 commit 7d71bf3

File tree

3 files changed

+50123
-0
lines changed

3 files changed

+50123
-0
lines changed

.github/workflows/process-tranco.yml

+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
name: Process Tranco CSV
2+
3+
on:
4+
push:
5+
paths:
6+
- 'tranco.csv'
7+
pull_request:
8+
paths:
9+
- 'tranco.csv'
10+
workflow_dispatch: {} # Simple manual trigger with no inputs
11+
12+
jobs:
13+
process-tranco:
14+
runs-on: ubuntu-latest
15+
permissions:
16+
contents: write
17+
18+
steps:
19+
- name: Checkout repository
20+
uses: actions/checkout@v3
21+
with:
22+
fetch-depth: 1 # Only fetch the latest commit to speed up checkout
23+
24+
- name: Set configuration
25+
id: config
26+
run: |
27+
# Configuration is defined here
28+
CONFIG='[{"count": 10000, "filename": "tranco_top_10k.csv"}, {"count": 50000, "filename": "tranco_top_50k.csv"}]'
29+
echo "CONFIG=$CONFIG" >> $GITHUB_ENV
30+
echo "Using configuration: $CONFIG"
31+
32+
- name: Validate manifest.json
33+
id: validate
34+
run: |
35+
# Check if manifest.json exists
36+
if [ ! -f "manifest.json" ]; then
37+
echo "Error: manifest.json file not found"
38+
exit 1
39+
fi
40+
41+
# Create a temporary file to store validation results
42+
TEMP_FILE=$(mktemp)
43+
44+
# Check each output file in the configuration
45+
echo $CONFIG | jq -c '.[]' | while read -r config; do
46+
filename=$(echo $config | jq -r '.filename')
47+
48+
# Check if the filename is in manifest.json
49+
if ! grep -q "\"file\": \"$filename\"" manifest.json; then
50+
echo "Error: $filename is not defined in manifest.json"
51+
echo "VALIDATION_FAILED=true" >> $TEMP_FILE
52+
else
53+
echo "✓ $filename is defined in manifest.json"
54+
fi
55+
done
56+
57+
# Exit if any file is not defined in manifest.json
58+
if grep -q "VALIDATION_FAILED=true" $TEMP_FILE; then
59+
echo "One or more output files are not defined in manifest.json. Please update manifest.json first."
60+
rm $TEMP_FILE
61+
exit 1
62+
fi
63+
64+
rm $TEMP_FILE
65+
66+
- name: Process Tranco CSV
67+
id: process
68+
run: |
69+
echo "Processing Tranco CSV with configuration: $CONFIG"
70+
71+
# Check if tranco.csv exists
72+
if [ ! -f "tranco.csv" ]; then
73+
echo "Error: tranco.csv file not found"
74+
exit 1
75+
fi
76+
77+
# Parse the JSON configuration and process each output
78+
echo $CONFIG | jq -c '.[]' | while read -r config; do
79+
count=$(echo $config | jq -r '.count')
80+
filename=$(echo $config | jq -r '.filename')
81+
82+
if [ -z "$count" ] || [ -z "$filename" ]; then
83+
echo "Skipping invalid configuration: $config"
84+
continue
85+
fi
86+
87+
# Get exactly the requested number of lines from the file
88+
head -n $count tranco.csv > "$filename"
89+
90+
lines=$(wc -l < "$filename")
91+
echo "Successfully created $filename with $lines rows"
92+
done
93+
94+
- name: Configure Git
95+
run: |
96+
git config --local user.email "[email protected]"
97+
git config --local user.name "Tranco Process Bot"
98+
99+
- name: Commit and push changes
100+
run: |
101+
# Parse the configuration to get the filenames
102+
echo $CONFIG | jq -c '.[]' | while read -r config; do
103+
filename=$(echo $config | jq -r '.filename')
104+
# Add each generated file individually
105+
git add "$filename"
106+
done
107+
108+
# Check if there are any changes to commit
109+
if git diff --staged --quiet; then
110+
echo "No changes detected in the output files. Nothing to commit."
111+
else
112+
echo "Changes detected. Committing and pushing..."
113+
git commit -m "Update Tranco Top Domain [automated]" && git push
114+
fi

manifest.json

+9
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,15 @@
160160
"entry"
161161
]
162162
},
163+
{
164+
"file": "tranco_top_50k.csv",
165+
"identifier": "tranco_50k",
166+
"display_name": "Tranco Top 50,000 Domains",
167+
"headers": [
168+
"x",
169+
"entry"
170+
]
171+
},
163172
{
164173
"file": "umbrella_top_1m.csv",
165174
"identifier": "umbrella_1m",

0 commit comments

Comments
 (0)