Skip to content

Add JSON schema validator #468

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 16, 2023
2 changes: 2 additions & 0 deletions metafacture-json/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ dependencies {
implementation 'com.fasterxml.jackson.core:jackson-core:2.13.0'
implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.0'
implementation 'com.jayway.jsonpath:json-path:2.6.0'
implementation 'com.github.erosb:everit-json-schema:1.14.1'
testImplementation 'junit:junit:4.12'
testImplementation 'org.mockito:mockito-core:2.5.5'
testImplementation 'com.github.tomakehurst:wiremock-jre8:2.33.2'
testRuntimeOnly 'org.slf4j:slf4j-simple:1.7.21'
}
194 changes: 194 additions & 0 deletions metafacture-json/src/main/java/org/metafacture/json/JsonValidator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
/*
* Copyright 2021, 2023 Fabian Steeg, hbz
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.metafacture.json;

import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.MetafactureException;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultObjectPipe;

import org.everit.json.schema.Schema;
import org.everit.json.schema.ValidationException;
import org.everit.json.schema.loader.SchemaClient;
import org.everit.json.schema.loader.SchemaLoader;
import org.everit.json.schema.loader.SchemaLoader.SchemaLoaderBuilder;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONTokener;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

/**
* Validate JSON against a given schema, pass only valid input to the receiver.
*
* @author Fabian Steeg (fsteeg)
*/
@Description("Validate JSON against a given schema, send only valid input to the receiver. Pass the schema location to validate against. " +
"Write valid and/or invalid output to locations specified with `writeValid` and `writeInvalid`. " +
"Set the JSON key for the record ID value with `idKey` (for logging output, defaults to `id`).")
@In(String.class)
@Out(String.class)
@FluxCommand("validate-json")
public final class JsonValidator extends DefaultObjectPipe<String, ObjectReceiver<String>> {

private static final Logger LOG = LoggerFactory.getLogger(JsonValidator.class);
private static final String DEFAULT_ID_KEY = "id";
private Schema schema;
private long fail;
private long success;
private FileWriter writeInvalid;
private FileWriter writeValid;
private String idKey = DEFAULT_ID_KEY;

/**
* @param url The URL of the schema to validate against.
*/
public JsonValidator(final String url) {
initSchema(url);
}

/**
* @param writeValid The location to write valid data to.
*/
public void setWriteValid(final String writeValid) {
this.writeValid = fileWriter(writeValid);
}

/**
* @param writeInvalid The location to write invalid data to.
*/
public void setWriteInvalid(final String writeInvalid) {
this.writeInvalid = fileWriter(writeInvalid);
}

/**
* @param idKey The JSON key for the record ID value.
*/
public void setIdKey(final String idKey) {
this.idKey = idKey;
}

@Override
public void process(final String json) {
try {
validate(json, new JSONObject(json) /* throws JSONException on syntax error */);
}
catch (final JSONException e) {
handleInvalid(json, null, e.getMessage());
}
}

private void validate(final String json, final JSONObject object) {
try {
schema.validate(object); // throws ValidationException if invalid
getReceiver().process(json);
++success;
write(json, writeValid);
}
catch (final ValidationException e) {
handleInvalid(json, object, e.getAllMessages().toString());
}
}

@Override
protected void onCloseStream() {
close(writeInvalid);
close(writeValid);
LOG.debug("Success: {}, Fail: {}", success, fail);
super.onCloseStream();
}

private void initSchema(final String schemaUrl) {
if (schema != null) {
return;
}
SchemaLoaderBuilder schemaLoader = SchemaLoader.builder();
try {
final URL url = new URL(schemaUrl);
schemaLoader = schemaLoader.schemaJson(jsonFrom(url.openStream()))
.resolutionScope(baseFor(url.toString()));
}
catch (final IOException e) {
LOG.info("Could not read as URL: {}, trying to load from class path", schemaUrl);
schemaLoader = schemaLoader.schemaClient(SchemaClient.classPathAwareClient())
.schemaJson(jsonFrom(getClass().getResourceAsStream(schemaUrl)))
.resolutionScope("classpath://" + baseFor(schemaUrl));
}
schema = schemaLoader.build().load().build();
}

private JSONObject jsonFrom(final InputStream inputStream) {
try {
return new JSONObject(new JSONTokener(inputStream));
}
catch (final JSONException e) {
throw new MetafactureException(e.getMessage(), e);
}
}

private String baseFor(final String path) {
return path.substring(0, path.lastIndexOf('/') + 1);
}

private FileWriter fileWriter(final String fileLocation) {
try {
return new FileWriter(fileLocation);
}
catch (final IOException e) {
throw new MetafactureException(e.getMessage(), e);
}
}

private void handleInvalid(final String json, final JSONObject object,
final String errorMessage) {
LOG.info("Invalid JSON: {} in {}", errorMessage, object != null ? object.opt(idKey) : json);
++fail;
write(json, writeInvalid);
}

private void write(final String json, final FileWriter fileWriter) {
if (fileWriter != null) {
try {
fileWriter.append(json);
fileWriter.append("\n");
}
catch (final IOException e) {
throw new MetafactureException(e.getMessage(), e);
}
}
}

private void close(final FileWriter fileWriter) {
if (fileWriter != null) {
try {
fileWriter.close();
}
catch (final IOException e) {
throw new MetafactureException(e.getMessage(), e);
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
#
encode-json org.metafacture.json.JsonEncoder
decode-json org.metafacture.json.JsonDecoder
validate-json org.metafacture.json.JsonValidator
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/*
* Copyright 2021, 2023 Fabian Steeg, hbz
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.metafacture.json;

import static org.hamcrest.CoreMatchers.both;
import static org.hamcrest.CoreMatchers.containsString;
import static org.junit.Assert.assertThat;
import static com.github.tomakehurst.wiremock.client.WireMock.stubFor;
import static com.github.tomakehurst.wiremock.client.WireMock.request;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.function.Function;
import java.util.stream.Collectors;

import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.metafacture.framework.MetafactureException;
import org.metafacture.framework.ObjectReceiver;
import org.mockito.InOrder;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.MockitoAnnotations;

import com.github.tomakehurst.wiremock.client.WireMock;
import com.github.tomakehurst.wiremock.core.WireMockConfiguration;
import com.github.tomakehurst.wiremock.junit.WireMockRule;


/**
* Tests for {@link JsonValidator}.
*
* @author Fabian Steeg
*
*/
@RunWith(Parameterized.class)
public final class JsonValidatorTest {

private static final String MAIN_SCHEMA = "/schemas/schema.json";
private static final String ID_SCHEMA = "/schemas/id.json";
private static final String JSON_VALID = "{\"id\":\"http://example.org/\"}";
private static final String JSON_INVALID_MISSING_REQUIRED = "{}";
private static final String JSON_INVALID_URI_FORMAT= "{\"id\":\"example.org/\"}";
private static final String JSON_INVALID_DUPLICATE_KEY = "{\"id\":\"val\",\"id\":\"val\"}";
private static final String JSON_INVALID_SYNTAX_ERROR = "{\"id1\":\"val\",\"id2\":\"val\"";

private JsonValidator validator;

@Mock
private ObjectReceiver<String> receiver;
private InOrder inOrder;
private Function<Object, String> schemaLocationGetter;

@Rule
public WireMockRule wireMockRule = new WireMockRule(WireMockConfiguration.wireMockConfig()
.jettyAcceptors(Runtime.getRuntime().availableProcessors()).dynamicPort());

@Parameterized.Parameters(name = "{index}")
public static Collection<Object[]> siteMaps() {
return Arrays.asList((Object[][]) (new Function[][] { //
// Pass the schema to each test as path on classpath, file url, and http url:
{ (Object rule) -> MAIN_SCHEMA },
{ (Object rule) -> JsonValidatorTest.class.getResource(MAIN_SCHEMA).toString() },
{ (Object rule) -> ((WireMockRule) rule).baseUrl() + MAIN_SCHEMA } }));
}

public JsonValidatorTest(Function<Object, String> schemaLocationGetter) {
this.schemaLocationGetter = schemaLocationGetter;
}

@Before
public void setup() throws IOException {
MockitoAnnotations.initMocks(this);
wireMock(MAIN_SCHEMA, ID_SCHEMA);
String schemaLocation = schemaLocationGetter.apply(wireMockRule);
validator = new JsonValidator(schemaLocation);
validator.setReceiver(receiver);
inOrder = Mockito.inOrder(receiver);
}

private void wireMock(final String... schemaLocations) throws IOException {
for (String schemaLocation : schemaLocations) {
stubFor(request("GET", WireMock.urlEqualTo(schemaLocation)).willReturn(
WireMock.ok().withBody(readToString(getClass().getResource(schemaLocation)))
.withHeader("Content-type", "application/json")));
}
}

private String readToString(final URL url) throws IOException {
return new BufferedReader(new InputStreamReader(url.openStream(), StandardCharsets.UTF_8))
.lines().collect(Collectors.joining("\n"));
}

@Test
public void callWireMockSchema() throws MalformedURLException, IOException {
final String schemaContent = readToString(new URL(wireMockRule.baseUrl() + MAIN_SCHEMA));
assertThat(schemaContent, both(containsString("$schema")).and(containsString("$ref")));
}

@Test
public void testShouldValidate() {
validator.process(JSON_VALID);
inOrder.verify(receiver, Mockito.calls(1)).process(JSON_VALID);
}

@Test
public void testShouldInvalidateMissingRequired() {
validator.process(JSON_INVALID_MISSING_REQUIRED);
inOrder.verifyNoMoreInteractions();
}

@Test
public void testShouldInvalidateUriFormat() {
validator.process(JSON_INVALID_URI_FORMAT);
inOrder.verifyNoMoreInteractions();
}

@Test
public void testShouldInvalidateDuplicateKey() {
validator.process(JSON_INVALID_DUPLICATE_KEY);
inOrder.verifyNoMoreInteractions();
}

@Test
public void testShouldInvalidateSyntaxError() {
validator.process(JSON_INVALID_SYNTAX_ERROR);
inOrder.verifyNoMoreInteractions();
}

@Test(expected = MetafactureException.class)
public void testShouldCatchMissingSchemaFile() {
new JsonValidator("").process("{}");
}

@Test(expected = MetafactureException.class)
public void testShouldCatchMissingValidOutputFile() {
validator.setWriteValid("");
validator.process(JSON_INVALID_MISSING_REQUIRED);
}

@Test(expected = MetafactureException.class)
public void testShouldCatchMissingInvalidOutputFile() {
validator.setWriteInvalid("");
validator.process(JSON_INVALID_MISSING_REQUIRED);
}

@After
public void cleanup() {
validator.closeStream();
}

}
8 changes: 8 additions & 0 deletions metafacture-json/src/test/resources/schemas/id.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"$id": "id.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "URL",
"description": "The URL/URI of the resource",
"type": "string",
"format": "uri"
}
Loading