forked from IQSS/dataverse
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat,ci: move check scripts to separate files
- Easier to run and debug as separate files - Can be used locally, too - Use GraalVM to compile native binary for accents removal with same Java code as used in application (also uses JBang as build system) - "Just" using JBang is not fast enough, JVM startup times are making it sluggish!
- Loading branch information
1 parent
07d67ac
commit f4b61bf
Showing
3 changed files
with
151 additions
and
71 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,88 +1,34 @@ | ||
name: "Properties Check" | ||
on: | ||
pull_request: | ||
#paths: | ||
# - "**/*.properties" | ||
# - "scripts/api/data/metadatablocks/*" | ||
paths: | ||
- "src/**/*.properties" | ||
- "scripts/api/data/metadatablocks/*" | ||
jobs: | ||
duplicate_keys: | ||
name: Duplicate Keys | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- uses: actions/checkout@v4 | ||
- name: Run duplicates detection script | ||
shell: bash | ||
run: | | ||
FAIL=0 | ||
for PF in $(find . -wholename '*/src/*.properties'); do | ||
FILTER=$(grep -a -v -E "^(#.*|\s*$)" "$PF" | cut -d"=" -f1 | sort | uniq -c | tr -s " " | { grep -vs "^ 1 " || true; }) | ||
if [ -n "$FILTER" ]; then | ||
FAIL=1 | ||
echo "::group::$PF" | ||
for KEY in $(echo "$FILTER" | cut -d" " -f3); do | ||
for LINE in $(grep -n -E -e "^$KEY=" "$PF" | cut -d":" -f1); do | ||
echo "::error file=$PF,line=$LINE::Found duplicate for key '$KEY' in line $LINE" | ||
done | ||
done | ||
echo "::endgroup::" | ||
fi | ||
done | ||
if [ "$FAIL" -eq 1 ]; then | ||
exit 1 | ||
fi | ||
run: tests/check_duplicate_properties.sh | ||
|
||
metadata_blocks_properties: | ||
name: Metadata Blocks Properties | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Run metadata block properties verification script | ||
- uses: actions/checkout@v4 | ||
- name: Install JBang | ||
shell: bash | ||
run: | | ||
for MDB in $(find scripts/api/data/metadatablocks -name '*.tsv'); do | ||
BLOCK_NAME=$(sed -n "2p" "$MDB" | cut -f2) | ||
BLOCK_DISPLAYNAME=$(sed -n "2p" "$MDB" | cut -f4) | ||
PROPERTIES_FILE="src/main/java/propertyFiles/$BLOCK_NAME.properties" | ||
# Check correct file exists | ||
if [ ! -r "$PROPERTIES_FILE" ]; then | ||
echo "::error::Missing properties file for metadata block '$BLOCK_NAME', expected at '$PROPERTIES_FILE'" | ||
continue | ||
fi | ||
# Check metadata block properties exist and are equal to TSV source | ||
if ! grep -a -q -e "^metadatablock.name=$BLOCK_NAME$" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing 'metadatablock.name=$BLOCK_NAME' or different from TSV source" | ||
fi | ||
if ! grep -a -q -e "^metadatablock.displayName=$BLOCK_DISPLAYNAME$" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing 'metadatablock.displayName=$BLOCK_DISPLAYNAME' or different from TSV source" | ||
fi | ||
if ! grep -a -q -e "^metadatablock.displayFacet=" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing 'metadatablock.displayFacet=...'" | ||
fi | ||
# Check dataset fields | ||
for FIELD in $(grep -a -A1000 "^#datasetField" "$MDB" | tail -n+2 | grep -a -B1000 "^#controlledVocabulary" | head -n-1 | cut -f2); do | ||
for ENTRY in title description watermark; do | ||
if ! grep -a -q -e "^datasetfieldtype.$FIELD.$ENTRY=" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing key 'datasetfieldtype.$FIELD.$ENTRY=...'" | ||
fi | ||
done | ||
done | ||
# Check CV entries | ||
grep -a -A1000 "^#controlledVocabulary" "$MDB" | tail -n+2 | | ||
{ | ||
while read LINE; do | ||
FIELD_NAME=$(echo "$LINE" | cut -f1) | ||
# TODO: needs to replace UTF-8 chars with nearest ascii here! | ||
FIELD_VALUE=$(echo "$LINE" | cut -f2 | tr '[:upper:]' '[:lower:]' | tr " " "_") | ||
if ! grep -q -a -e "^controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing key 'controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=...'" | ||
fi | ||
done | ||
}; | ||
done | ||
curl -Ls https://sh.jbang.dev | bash -s - app setup | ||
- name: Install GraalVM + Native Image | ||
uses: graalvm/setup-graalvm@v1 | ||
with: | ||
github-token: ${{ secrets.GITHUB_TOKEN }} | ||
java-version: '21' | ||
distribution: 'graalvm-community' | ||
- name: Run metadata block properties verification script | ||
shell: bash | ||
run: tests/verify_mdb_properties.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/bin/bash | ||
|
||
# This script will check Java *.properties files within the src dir for duplicates | ||
# and print logs with file annotations about it. | ||
|
||
set -euo pipefail | ||
|
||
FAIL=0 | ||
|
||
while IFS= read -r -d '' FILE; do | ||
|
||
# Scan the whole file for duplicates | ||
FILTER=$(grep -a -v -E "^(#.*|\s*$)" "$FILE" | cut -d"=" -f1 | sort | uniq -c | tr -s " " | { grep -vs "^ 1 " || true; }) | ||
|
||
# If there are any duplicates present, analyse further to point people to the source | ||
if [ -n "$FILTER" ]; then | ||
FAIL=1 | ||
|
||
echo "::group::$FILE" | ||
for KEY in $(echo "$FILTER" | cut -d" " -f3); do | ||
# Find duplicate lines' numbers by grepping for the KEY and cutting the number from the output | ||
DUPLICATE_LINES=$(grep -n -E -e "^$KEY=" "$FILE" | cut -d":" -f1) | ||
# Join the found line numbers for better error log | ||
DUPLICATE_NUMBERS=$(echo "$DUPLICATE_LINES" | paste -sd ',') | ||
|
||
# This form will make Github annotate the lines in the PR that changes the properties file | ||
for LINE_NUMBER in $DUPLICATE_LINES; do | ||
echo "::error file=$FILE,line=$LINE_NUMBER::Found duplicate for key '$KEY' in lines $DUPLICATE_NUMBERS" | ||
done | ||
done | ||
echo "::endgroup::" | ||
fi | ||
done < <( find "$(git rev-parse --show-cdup)" -wholename "*/src/*.properties" -print0 ) | ||
|
||
if [ "$FAIL" -eq 1 ]; then | ||
exit 1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#!/bin/bash | ||
|
||
# This script will check our metadata block files and scan if the properties files contain all the matching keys. | ||
|
||
set -euo pipefail | ||
|
||
if ! which jbang > /dev/null 2>&1; then | ||
echo "Cannot find jbang on path. Did you install it?" >&2 | ||
exit 1 | ||
fi | ||
if ! which native-image > /dev/null 2>&1; then | ||
echo "Cannot find GraalVM native-image on path. Did you install it?" >&2 | ||
exit 1 | ||
fi | ||
|
||
FAIL=0 | ||
|
||
# We need a small Java app to replace UTF-8 chars with nearest ascii / strip accents because of | ||
# https://github.com/IQSS/dataverse/blob/dddcf29188a5c35174f3c94ffc1c4cb1d7fc0552/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java#L140 | ||
# This cannot be replaced by another tool, as it behaves rather individually. | ||
DIR=$(mktemp -d) | ||
SOURCE="$DIR/stripaccents.java" | ||
STRIP_BIN="$(dirname "$0")/stripaccents" | ||
cat > "$SOURCE" << EOF | ||
///usr/bin/env jbang "\$0" "\$@" ; exit \$? | ||
//JAVA 11+ | ||
//DEPS org.apache.commons:commons-lang3:3.12.0 | ||
import org.apache.commons.lang3.StringUtils; | ||
import java.nio.charset.StandardCharsets; | ||
import java.io.IOException; | ||
class stripaccents { | ||
public static void main(String[] args) throws IOException { | ||
String input = new String(System.in.readAllBytes(), StandardCharsets.UTF_8); | ||
System.out.println(StringUtils.stripAccents(input)); | ||
} | ||
} | ||
EOF | ||
jbang export native --force --fresh -O "$STRIP_BIN" "$SOURCE" | ||
|
||
while IFS= read -r -d '' MDB; do | ||
|
||
echo "::group::$MDB" | ||
BLOCK_NAME=$(sed -n "2p" "$MDB" | cut -f2) | ||
BLOCK_DISPLAY_NAME=$(sed -n "2p" "$MDB" | cut -f4) | ||
PROPERTIES_FILE="$(git rev-parse --show-cdup)src/main/java/propertyFiles/$BLOCK_NAME.properties" | ||
|
||
# Check correct file exists | ||
if [ ! -r "$PROPERTIES_FILE" ]; then | ||
echo "::error::Missing properties file for metadata block '$BLOCK_NAME', expected at '$PROPERTIES_FILE'" | ||
FAIL=1 | ||
continue | ||
fi | ||
|
||
# Check metadata block properties exist and are equal to TSV source | ||
if ! grep -a -q -e "^metadatablock.name=$BLOCK_NAME$" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing 'metadatablock.name=$BLOCK_NAME' or different from TSV source in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
if ! grep -a -q -e "^metadatablock.displayName=$BLOCK_DISPLAY_NAME$" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing 'metadatablock.displayName=$BLOCK_DISPLAY_NAME' or different from TSV source in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
if ! grep -a -q -e "^metadatablock.displayFacet=" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing 'metadatablock.displayFacet=...' in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
|
||
# Check dataset fields | ||
for FIELD in $(grep -a -A1000 "^#datasetField" "$MDB" | tail -n+2 | grep -a -B1000 "^#controlledVocabulary" | head -n-1 | cut -f2); do | ||
for ENTRY in title description watermark; do | ||
if ! grep -a -q -e "^datasetfieldtype.$FIELD.$ENTRY=" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing key 'datasetfieldtype.$FIELD.$ENTRY=...' in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
done | ||
done | ||
|
||
# Check CV entries | ||
while read -r LINE; do | ||
FIELD_NAME=$(echo "$LINE" | cut -f1) | ||
FIELD_VALUE=$(echo "$LINE" | cut -f2 | tr '[:upper:]' '[:lower:]' | tr " " "_" | "$STRIP_BIN" ) | ||
|
||
if ! grep -q -a -e "^controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=" "$PROPERTIES_FILE"; then | ||
echo "::error::Missing key 'controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=...' in $PROPERTIES_FILE" | ||
FAIL=1 | ||
fi | ||
done < <(grep -a -A1000 "^#controlledVocabulary" "$MDB" | tail -n+2) | ||
|
||
echo "::endgroup::" | ||
|
||
done < <( find "$(git rev-parse --show-cdup)scripts/api/data/metadatablocks" -name '*.tsv' -print0 ) | ||
|
||
rm "$SOURCE" "$STRIP_BIN" | ||
|
||
if [ "$FAIL" -eq 1 ]; then | ||
exit 1 | ||
fi |