Skip to content

Commit

Permalink
Enable, suggest, and demonstrate mimeType values
Browse files Browse the repository at this point in the history
This patch adds a class hierarchy to distinguish between known IANA
Media Types and Media Types known to not be registered with IANA.

A unit test is added to demonstrate how mimeType being objects can also
enable hierarchical searches, even between IANA and non-IANA types.

A follow-on patch will generate validation result files.

References:
* #363

Signed-off-by: Alex Nelson <[email protected]>
  • Loading branch information
ajnelson-nist committed May 4, 2022
1 parent 5c15b29 commit d75d307
Show file tree
Hide file tree
Showing 7 changed files with 360 additions and 1 deletion.
30 changes: 29 additions & 1 deletion ontology/observable/observable.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix types: <https://ontology.unifiedcyberontology.org/uco/types/> .
@prefix vocabulary: <https://ontology.unifiedcyberontology.org/uco/vocabulary/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
Expand Down Expand Up @@ -10947,8 +10948,35 @@ observable:mimeClass
observable:mimeType
a owl:ObjectProperty ;
rdfs:label "mimeType"@en ;
rdfs:comment "MIME type of the data. For interoperability with non-UCO resources, dcterms:FileFormat is this property's required range."@en ;
rdfs:comment "MIME type of the data. The text form of '${type}/${subtype}' (for example, 'text/html' or 'audio/mp3') can be used to find a UCO-provisioned set of types from the UCO Media Types Taxonomy. For interoperability with non-UCO resources, dcterms:FileFormat is this property's required range. The more-specific UCO classes types:IANAMediaType or types:NonIANAMediaType should be used when available."@en ;
rdfs:range dcterms:FileFormat ;
rdfs:seeAlso
types:IANAMediaType ,
types:NonIANAMediaType ,
<https://taxonomy.unifiedcyberontology.org/uco/mime/>
;
.

observable:mimeType-class-types-MIMEFormat
a sh:PropertyShape ;
rdfs:comment "This shape is given an IRI in order to facilitate deactivation on request."@en ;
rdfs:seeAlso sh:deactivated ;
sh:class types:MIMEFormat ;
sh:message "Value is not an instance of types:MIMEFormat or one of its subclasses. Please consider using a value having type types:IANAMediaType or types:NonIANAMediaType."@en ;
sh:path observable:mimeType ;
sh:severity sh:Info ;
sh:targetSubjectsOf observable:mimeType ;
.

observable:mimeType-notation
a sh:PropertyShape ;
rdfs:comment "This shape is intended to be deactivated by using overriding shapes in the UCO Mime Taxonomy."@en ;
rdfs:seeAlso <https://taxonomy.unifiedcyberontology.org/uco/mime/> ;
sh:message "The used mimeType value is not a concept with a skos:notation. Some consumers of this data expect a string with the MIME 'type/subtype' format to be supplied with skos:notation. Please either add the skos:notation to your input graph or incorporate the UCO MIME Taxonomy."@en ;
sh:minCount "1"^^xsd:integer ;
sh:path skos:notation ;
sh:severity sh:Warning ;
sh:targetObjectsOf observable:mimeType ;
.

observable:minorImageVersion
Expand Down
27 changes: 27 additions & 0 deletions ontology/types/types.ttl
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# imports: https://ontology.unifiedcyberontology.org/uco/core
# imports: https://ontology.unifiedcyberontology.org/uco/vocabulary

@prefix core: <https://ontology.unifiedcyberontology.org/uco/core/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix observable: <https://ontology.unifiedcyberontology.org/uco/observable/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
Expand Down Expand Up @@ -160,16 +162,41 @@ types:Hash
sh:targetClass types:Hash ;
.

types:IANAMediaType
a owl:Class ;
rdfs:subClassOf types:MIMEFormat ;
rdfs:label "IANAMediaType"@en ;
owl:disjointWith types:NonIANAMediaType ;
.

types:Identifier
a rdfs:Datatype ;
rdfs:comment "An identifier is a string conformant to the specified UUID-based format for UCO object identifiers."@en ;
.

types:MIMEFormat
a owl:Class ;
rdfs:subClassOf
dcterms:FileFormat ,
core:TaxonomicConcept
;
rdfs:label "MIMEFormat"@en ;
rdfs:comment "This class should be considered an intermediary, 'abstract' class, and should not have individuals defined that are not a member of one of its subclasses. This class has two purposes. First, to encode for interoperability with existing frameworks, without relying on RDFS inference, that a UCO MIMEFormat individual is both a SKOS Concept (via core:TaxonomicConcept) and Dublin Core Terms FileFormat. Second, to guarantee a concept to be used as a UCO MIME type has exactly one skos:notation. Note that strict SHACL enforcement of the skos:notation presence is not within the UCO Types ontology, but is instead a part of the shapes provided by the UCO MIME Taxonomy. The warning about skos:notation in the Observable namespace is deactivated on importing the taxonomy's shapes."@en ;
rdfs:seeAlso observable:mimeType-notation ;
.

types:NativeFormatString
a rdfs:Datatype ;
rdfs:comment "Specifies data in its native format of some external language. The data may be encoded in Base64 per [RFC4648]. Data encoded in Base64 must be denoted as such using the encoded property."@en ;
.

types:NonIANAMediaType
a owl:Class ;
rdfs:subClassOf types:MIMEFormat ;
rdfs:label "NonIANAMediaType"@en ;
owl:disjointWith types:IANAMediaType ;
.

types:StructuredText
a rdfs:Datatype ;
rdfs:comment "Expresses string-based data in some information structuring format (e.g., HTML5)."@en ;
Expand Down
4 changes: 4 additions & 0 deletions tests/examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ all: \
hash_XFAIL_validation.ttl \
location_PASS_validation.ttl \
location_XFAIL_validation.ttl \
mime_PASS_validation.ttl \
mime_XFAIL_validation.ttl \
relationship_PASS_validation.ttl \
relationship_XFAIL_validation.ttl

Expand Down Expand Up @@ -74,6 +76,8 @@ check: \
hash_XFAIL_validation.ttl \
location_PASS_validation.ttl \
location_XFAIL_validation.ttl \
mime_PASS_validation.ttl \
mime_XFAIL_validation.ttl \
relationship_PASS_validation.ttl \
relationship_XFAIL_validation.ttl
source $(tests_srcdir)/venv/bin/activate \
Expand Down
152 changes: 152 additions & 0 deletions tests/examples/mime_PASS.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
{
"@context": {
"core": "https://ontology.unifiedcyberontology.org/uco/core/",
"dcterms": "http://purl.org/dc/terms/",
"kb": "http://example.org/kb/",
"observable": "https://ontology.unifiedcyberontology.org/uco/observable/",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"skos": "http://www.w3.org/2004/02/skos/core#",
"types": "https://ontology.unifiedcyberontology.org/uco/types/"
},
"@graph": [
{
"@id": "urn:example:mime:dcterms:application/gzip",
"@type": "dcterms:FileFormat",
"rdfs:comment": "This is a custom media type individual, designed for just this unit test. A larger taxonomy of individuals should be used outside of testing contexts.",
"skos:notation": "application/gzip"
},
{
"@id": "urn:example:mime:uco:application/gzip",
"@type": "types:IANAMediaType",
"rdfs:comment": "This is a custom media type individual, designed for just this unit test. A larger taxonomy of individuals should be used outside of testing contexts.",
"skos:exactMatch": {
"@id": "urn:example:mime:dcterms:application/gzip"
},
"skos:notation": "application/gzip"
},
{
"@id": "urn:example:mime:uco:application/tar",
"@type": "types:NonIANAMediaType",
"rdfs:comment": "This is a custom media type individual, designed for just this unit test. A larger taxonomy of individuals should be used outside of testing contexts.",
"skos:notation": "application/tar"
},
{
"@id": "urn:example:mime:uco:application/tar+gzip",
"@type": "types:NonIANAMediaType",
"rdfs:comment": "This is a custom media type individual, designed for just this unit test. A larger taxonomy of individuals should be used outside of testing contexts.",
"skos:broader": [
{
"@id": "urn:example:mime:uco:application/gzip"
},
{
"@id": "urn:example:mime:uco:application/tar"
}
],
"skos:notation": "application/tar+gzip"
},
{
"@id": "urn:example:mime:uco:image/example.image.type.without.notation",
"@type": "types:NonIANAMediaType",
"rdfs:comment": "This is a custom media type individual, designed for just this unit test. A larger taxonomy of individuals should be used outside of testing contexts."
},
{
"@id": "kb:file-1",
"@type": "observable:File",
"core:hasFacet": [
{
"@type": "observable:ContentDataFacet",
"observable:mimeType": {
"@id": "urn:example:mime:dcterms:application/gzip"
}
},
{
"@type": "observable:FileFacet",
"observable:fileName": "1.gz"
}
],
"rdfs:comment": "This file node should trigger an sh:Info-severity result."
},
{
"@id": "kb:file-2",
"@type": "observable:File",
"core:hasFacet": [
{
"@type": "observable:ContentDataFacet",
"observable:mimeType": {
"@id": "urn:example:mime:uco:application/gzip"
}
},
{
"@type": "observable:FileFacet",
"observable:fileName": "2.gz"
}
]
},
{
"@id": "kb:file-3",
"@type": "observable:File",
"core:hasFacet": [
{
"@type": "observable:ContentDataFacet",
"observable:mimeType": {
"@id": "urn:example:mime:uco:application/tar"
}
},
{
"@type": "observable:FileFacet",
"observable:fileName": "3.tar"
}
]
},
{
"@id": "kb:file-4",
"@type": "observable:File",
"core:hasFacet": [
{
"@type": "observable:ContentDataFacet",
"observable:mimeType": {
"@id": "urn:example:mime:uco:application/tar+gzip"
}
},
{
"@type": "observable:FileFacet",
"observable:fileName": "4.tar.gz"
}
]
},
{
"@id": "kb:file-5",
"@type": "observable:File",
"core:hasFacet": [
{
"@type": "observable:ContentDataFacet",
"observable:mimeType": {
"@id": "urn:example:mime:uco:image/example.image.type.without.notation"
}
},
{
"@type": "observable:FileFacet",
"observable:fileName": "5.dat"
}
],
"rdfs:comment": "This file node should trigger an sh:Warning-severity result."
},
{
"@id": "kb:file-6",
"@type": "observable:File",
"core:hasFacet": [
{
"@type": "observable:ContentDataFacet",
"observable:mimeType": {
"@id": "urn:example:mime:uco:image/example.image.type.without.notation"
}
},
{
"@type": "observable:FileFacet",
"observable:fileName": "6.dat"
}
],
"rdfs:comment": "This file node should trigger an sh:Warning-severity result."
}
]
}
28 changes: 28 additions & 0 deletions tests/examples/mime_XFAIL.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"@context": {
"core": "https://ontology.unifiedcyberontology.org/uco/core/",
"dcterms": "http://purl.org/dc/terms/",
"kb": "http://example.org/kb/",
"observable": "https://ontology.unifiedcyberontology.org/uco/observable/",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"skos": "http://www.w3.org/2004/02/skos/core#",
"types": "https://ontology.unifiedcyberontology.org/uco/types/"
},
"@graph": [
{
"@id": "kb:file-7",
"@type": "observable:File",
"core:hasFacet": [
{
"@type": "observable:ContentDataFacet",
"observable:mimeType": "application/gzip"
},
{
"@type": "observable:FileFacet",
"observable:fileName": "7.gz"
}
],
"rdfs:comment": "This file node errantly uses a literal instead of object reference."
}
]
}
98 changes: 98 additions & 0 deletions tests/examples/test_mime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env python3

# This software was developed at the National Institute of Standards
# and Technology by employees of the Federal Government in the course
# of their official duties. Pursuant to title 17 Section 105 of the
# United States Code this software is not subject to copyright
# protection and is in the public domain. NIST assumes no
# responsibility whatsoever for its use by other parties, and makes
# no guarantees, expressed or implied, about its quality,
# reliability, or any other characteristic.
#
# We would appreciate acknowledgement if the software is used.

import typing

import pytest
import rdflib

NS_UCO_CORE = rdflib.Namespace("https://ontology.unifiedcyberontology.org/uco/core/")
NS_UCO_OBSERVABLE = rdflib.Namespace("https://ontology.unifiedcyberontology.org/uco/observable/")

NSDICT = {
"core": NS_UCO_CORE,
"observable": NS_UCO_OBSERVABLE,
"skos": rdflib.SKOS
}

@pytest.fixture
def mime_pass_graph() -> rdflib.Graph:
graph = rdflib.Graph()
graph.parse("mime_PASS.json", format="json-ld")
return graph


def test_mime_file_names(mime_pass_graph: rdflib.Graph) -> None:
expected: typing.Set[str] = {
"1.gz",
"2.gz",
"3.tar",
"4.tar.gz",
"5.dat",
"6.dat",
}
computed: typing.Set[str] = set()

query = rdflib.plugins.sparql.prepareQuery("""\
SELECT ?lFileName
WHERE {
?nFile core:hasFacet/observable:fileName ?lFileName .
}
""", initNs=NSDICT)
for result in mime_pass_graph.query(query):
computed.add(str(result[0]))
assert expected == computed


def test_mime_gzip_files(mime_pass_graph: rdflib.Graph) -> None:
expected: typing.Set[str] = {
"1.gz",
"2.gz",
"4.tar.gz",
}
computed: typing.Set[str] = set()

query = rdflib.plugins.sparql.prepareQuery("""\
SELECT ?lFileName
WHERE {
?nFile core:hasFacet/observable:fileName ?lFileName .
?nFile core:hasFacet/observable:mimeType ?nMimeType .
?nMimeType skos:exactMatch*/skos:broader*/skos:notation "application/gzip" .
}
""", initNs=NSDICT)
for result in mime_pass_graph.query(query):
computed.add(str(result[0]))
assert expected == computed



def test_mime_tar_files(mime_pass_graph: rdflib.Graph) -> None:
expected: typing.Set[str] = {
"3.tar",
"4.tar.gz",
}
computed: typing.Set[str] = set()

query = rdflib.plugins.sparql.prepareQuery("""\
SELECT ?lFileName
WHERE {
?nFile core:hasFacet/observable:fileName ?lFileName .
?nFile core:hasFacet/observable:mimeType ?nMimeType .
?nMimeType skos:exactMatch*/skos:broader*/skos:notation "application/tar" .
}
""", initNs=NSDICT)
for result in mime_pass_graph.query(query):
computed.add(str(result[0]))
assert expected == computed
Loading

0 comments on commit d75d307

Please sign in to comment.