diff --git a/.github/workflows/check_property_files.yml b/.github/workflows/check_property_files.yml index bb51f810cb1..dce5490d071 100644 --- a/.github/workflows/check_property_files.yml +++ b/.github/workflows/check_property_files.yml @@ -1,88 +1,34 @@ name: "Properties Check" on: pull_request: - #paths: - # - "**/*.properties" - # - "scripts/api/data/metadatablocks/*" + paths: + - "src/**/*.properties" + - "scripts/api/data/metadatablocks/*" jobs: duplicate_keys: name: Duplicate Keys runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Run duplicates detection script shell: bash - run: | - FAIL=0 - - for PF in $(find . -wholename '*/src/*.properties'); do - FILTER=$(grep -a -v -E "^(#.*|\s*$)" "$PF" | cut -d"=" -f1 | sort | uniq -c | tr -s " " | { grep -vs "^ 1 " || true; }) - if [ -n "$FILTER" ]; then - FAIL=1 - - echo "::group::$PF" - for KEY in $(echo "$FILTER" | cut -d" " -f3); do - for LINE in $(grep -n -E -e "^$KEY=" "$PF" | cut -d":" -f1); do - echo "::error file=$PF,line=$LINE::Found duplicate for key '$KEY' in line $LINE" - done - done - echo "::endgroup::" - fi - done - - if [ "$FAIL" -eq 1 ]; then - exit 1 - fi + run: tests/check_duplicate_properties.sh metadata_blocks_properties: name: Metadata Blocks Properties runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - name: Run metadata block properties verification script + - uses: actions/checkout@v4 + - name: Install JBang shell: bash run: | - for MDB in $(find scripts/api/data/metadatablocks -name '*.tsv'); do - BLOCK_NAME=$(sed -n "2p" "$MDB" | cut -f2) - BLOCK_DISPLAYNAME=$(sed -n "2p" "$MDB" | cut -f4) - PROPERTIES_FILE="src/main/java/propertyFiles/$BLOCK_NAME.properties" - - # Check correct file exists - if [ ! -r "$PROPERTIES_FILE" ]; then - echo "::error::Missing properties file for metadata block '$BLOCK_NAME', expected at '$PROPERTIES_FILE'" - continue - fi - - # Check metadata block properties exist and are equal to TSV source - if ! grep -a -q -e "^metadatablock.name=$BLOCK_NAME$" "$PROPERTIES_FILE"; then - echo "::error::Missing 'metadatablock.name=$BLOCK_NAME' or different from TSV source" - fi - if ! grep -a -q -e "^metadatablock.displayName=$BLOCK_DISPLAYNAME$" "$PROPERTIES_FILE"; then - echo "::error::Missing 'metadatablock.displayName=$BLOCK_DISPLAYNAME' or different from TSV source" - fi - if ! grep -a -q -e "^metadatablock.displayFacet=" "$PROPERTIES_FILE"; then - echo "::error::Missing 'metadatablock.displayFacet=...'" - fi - - # Check dataset fields - for FIELD in $(grep -a -A1000 "^#datasetField" "$MDB" | tail -n+2 | grep -a -B1000 "^#controlledVocabulary" | head -n-1 | cut -f2); do - for ENTRY in title description watermark; do - if ! grep -a -q -e "^datasetfieldtype.$FIELD.$ENTRY=" "$PROPERTIES_FILE"; then - echo "::error::Missing key 'datasetfieldtype.$FIELD.$ENTRY=...'" - fi - done - done - - # Check CV entries - grep -a -A1000 "^#controlledVocabulary" "$MDB" | tail -n+2 | - { - while read LINE; do - FIELD_NAME=$(echo "$LINE" | cut -f1) - # TODO: needs to replace UTF-8 chars with nearest ascii here! - FIELD_VALUE=$(echo "$LINE" | cut -f2 | tr '[:upper:]' '[:lower:]' | tr " " "_") - if ! grep -q -a -e "^controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=" "$PROPERTIES_FILE"; then - echo "::error::Missing key 'controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=...'" - fi - done - }; - done \ No newline at end of file + curl -Ls https://sh.jbang.dev | bash -s - app setup + - name: Install GraalVM + Native Image + uses: graalvm/setup-graalvm@v1 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + java-version: '21' + distribution: 'graalvm-community' + - name: Run metadata block properties verification script + shell: bash + run: tests/verify_mdb_properties.sh diff --git a/tests/check_duplicate_properties.sh b/tests/check_duplicate_properties.sh new file mode 100755 index 00000000000..c536b7600eb --- /dev/null +++ b/tests/check_duplicate_properties.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# This script will check Java *.properties files within the src dir for duplicates +# and print logs with file annotations about it. + +set -euo pipefail + +FAIL=0 + +while IFS= read -r -d '' FILE; do + + # Scan the whole file for duplicates + FILTER=$(grep -a -v -E "^(#.*|\s*$)" "$FILE" | cut -d"=" -f1 | sort | uniq -c | tr -s " " | { grep -vs "^ 1 " || true; }) + + # If there are any duplicates present, analyse further to point people to the source + if [ -n "$FILTER" ]; then + FAIL=1 + + echo "::group::$FILE" + for KEY in $(echo "$FILTER" | cut -d" " -f3); do + # Find duplicate lines' numbers by grepping for the KEY and cutting the number from the output + DUPLICATE_LINES=$(grep -n -E -e "^$KEY=" "$FILE" | cut -d":" -f1) + # Join the found line numbers for better error log + DUPLICATE_NUMBERS=$(echo "$DUPLICATE_LINES" | paste -sd ',') + + # This form will make Github annotate the lines in the PR that changes the properties file + for LINE_NUMBER in $DUPLICATE_LINES; do + echo "::error file=$FILE,line=$LINE_NUMBER::Found duplicate for key '$KEY' in lines $DUPLICATE_NUMBERS" + done + done + echo "::endgroup::" + fi +done < <( find "$(git rev-parse --show-cdup)" -wholename "*/src/*.properties" -print0 ) + +if [ "$FAIL" -eq 1 ]; then + exit 1 +fi diff --git a/tests/verify_mdb_properties.sh b/tests/verify_mdb_properties.sh new file mode 100755 index 00000000000..3378322e4bf --- /dev/null +++ b/tests/verify_mdb_properties.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +# This script will check our metadata block files and scan if the properties files contain all the matching keys. + +set -euo pipefail + +if ! which jbang > /dev/null 2>&1; then + echo "Cannot find jbang on path. Did you install it?" >&2 + exit 1 +fi +if ! which native-image > /dev/null 2>&1; then + echo "Cannot find GraalVM native-image on path. Did you install it?" >&2 + exit 1 +fi + +FAIL=0 + +# We need a small Java app to replace UTF-8 chars with nearest ascii / strip accents because of +# https://github.com/IQSS/dataverse/blob/dddcf29188a5c35174f3c94ffc1c4cb1d7fc0552/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java#L140 +# This cannot be replaced by another tool, as it behaves rather individually. +DIR=$(mktemp -d) +SOURCE="$DIR/stripaccents.java" +STRIP_BIN="$(dirname "$0")/stripaccents" +cat > "$SOURCE" << EOF +///usr/bin/env jbang "\$0" "\$@" ; exit \$? +//JAVA 11+ +//DEPS org.apache.commons:commons-lang3:3.12.0 +import org.apache.commons.lang3.StringUtils; +import java.nio.charset.StandardCharsets; +import java.io.IOException; +class stripaccents { + public static void main(String[] args) throws IOException { + String input = new String(System.in.readAllBytes(), StandardCharsets.UTF_8); + System.out.println(StringUtils.stripAccents(input)); + } +} +EOF +jbang export native --force --fresh -O "$STRIP_BIN" "$SOURCE" + +while IFS= read -r -d '' MDB; do + + echo "::group::$MDB" + BLOCK_NAME=$(sed -n "2p" "$MDB" | cut -f2) + BLOCK_DISPLAY_NAME=$(sed -n "2p" "$MDB" | cut -f4) + PROPERTIES_FILE="$(git rev-parse --show-cdup)src/main/java/propertyFiles/$BLOCK_NAME.properties" + + # Check correct file exists + if [ ! -r "$PROPERTIES_FILE" ]; then + echo "::error::Missing properties file for metadata block '$BLOCK_NAME', expected at '$PROPERTIES_FILE'" + FAIL=1 + continue + fi + + # Check metadata block properties exist and are equal to TSV source + if ! grep -a -q -e "^metadatablock.name=$BLOCK_NAME$" "$PROPERTIES_FILE"; then + echo "::error::Missing 'metadatablock.name=$BLOCK_NAME' or different from TSV source in $PROPERTIES_FILE" + FAIL=1 + fi + if ! grep -a -q -e "^metadatablock.displayName=$BLOCK_DISPLAY_NAME$" "$PROPERTIES_FILE"; then + echo "::error::Missing 'metadatablock.displayName=$BLOCK_DISPLAY_NAME' or different from TSV source in $PROPERTIES_FILE" + FAIL=1 + fi + if ! grep -a -q -e "^metadatablock.displayFacet=" "$PROPERTIES_FILE"; then + echo "::error::Missing 'metadatablock.displayFacet=...' in $PROPERTIES_FILE" + FAIL=1 + fi + + # Check dataset fields + for FIELD in $(grep -a -A1000 "^#datasetField" "$MDB" | tail -n+2 | grep -a -B1000 "^#controlledVocabulary" | head -n-1 | cut -f2); do + for ENTRY in title description watermark; do + if ! grep -a -q -e "^datasetfieldtype.$FIELD.$ENTRY=" "$PROPERTIES_FILE"; then + echo "::error::Missing key 'datasetfieldtype.$FIELD.$ENTRY=...' in $PROPERTIES_FILE" + FAIL=1 + fi + done + done + + # Check CV entries + while read -r LINE; do + FIELD_NAME=$(echo "$LINE" | cut -f1) + FIELD_VALUE=$(echo "$LINE" | cut -f2 | tr '[:upper:]' '[:lower:]' | tr " " "_" | "$STRIP_BIN" ) + + if ! grep -q -a -e "^controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=" "$PROPERTIES_FILE"; then + echo "::error::Missing key 'controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=...' in $PROPERTIES_FILE" + FAIL=1 + fi + done < <(grep -a -A1000 "^#controlledVocabulary" "$MDB" | tail -n+2) + + echo "::endgroup::" + +done < <( find "$(git rev-parse --show-cdup)scripts/api/data/metadatablocks" -name '*.tsv' -print0 ) + +rm "$SOURCE" "$STRIP_BIN" + +if [ "$FAIL" -eq 1 ]; then + exit 1 +fi