Skip to content

Commit f7bdb7d

Browse files
author
Horia Chiorean
committed
MODE-2684 Removes the compile time dependency of modeshape-core towards Apache Tika
The mime type extraction functionality will still function as-is if Tika is present, but now there is also an independent extension-based default which will be used if Tika is not present in the CP at runtime
1 parent df71159 commit f7bdb7d

File tree

19 files changed

+1160
-46
lines changed

19 files changed

+1160
-46
lines changed

deploy/jbossas/kit/jboss-wf/org/modeshape/main/module.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
<!-- for the DB binary store -->
3838
<module name="org.jgroups" slot="${jgroups.module.slot}"/>
3939

40-
<module name="org.apache.tika" slot="${version.org.apache.tika}" services="import"/>
40+
<module name="org.apache.tika" slot="${version.org.apache.tika}" services="import" optional="true"/>
4141

4242
<!-- For naming ... -->
4343
<module name="javax.api" export="true"/>

extractors/modeshape-extractor-tika/src/test/java/org/modeshape/extractor/tika/TikaTextExtractorTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
import org.modeshape.common.util.IoUtil;
4242
import org.modeshape.jcr.InMemoryTestBinary;
4343
import org.modeshape.jcr.LocalEnvironment;
44-
import org.modeshape.jcr.mimetype.ContentDetector;
44+
import org.modeshape.jcr.mimetype.tika.TikaContentDetector;
4545
import org.modeshape.jcr.mimetype.MimeTypeDetector;
4646
import org.modeshape.jcr.text.TextExtractorContext;
4747
import org.modeshape.jcr.text.TextExtractorOutput;
@@ -51,7 +51,7 @@
5151
*/
5252
public class TikaTextExtractorTest {
5353

54-
private static final MimeTypeDetector DETECTOR = new ContentDetector(new LocalEnvironment());
54+
private static final MimeTypeDetector DETECTOR = new TikaContentDetector(new LocalEnvironment());
5555
private static final int DEFAULT_TIKA_WRITE_LIMIT = 100000;
5656
private static final String CHARS = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
5757
private static final Random RANDOM = new Random();

modeshape-jcr/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
<dependency>
6666
<groupId>org.apache.tika</groupId>
6767
<artifactId>tika-core</artifactId>
68+
<scope>provided</scope>
6869
</dependency>
6970
<dependency>
7071
<groupId>org.apache.tika</groupId>

modeshape-jcr/src/main/java/org/modeshape/jcr/RepositoryConfiguration.java

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,8 @@
5454
import org.modeshape.jcr.api.index.IndexDefinition.IndexKind;
5555
import org.modeshape.jcr.api.txn.TransactionManagerLookup;
5656
import org.modeshape.jcr.index.local.LocalIndexProvider;
57-
import org.modeshape.jcr.mimetype.ContentDetector;
5857
import org.modeshape.jcr.mimetype.MimeTypeDetector;
59-
import org.modeshape.jcr.mimetype.NameOnlyDetector;
60-
import org.modeshape.jcr.mimetype.NullMimeTypeDetector;
58+
import org.modeshape.jcr.mimetype.MimeTypeDetectors;
6159
import org.modeshape.jcr.security.AnonymousProvider;
6260
import org.modeshape.jcr.security.JaasProvider;
6361
import org.modeshape.jcr.txn.DefaultTransactionManagerLookup;
@@ -1236,23 +1234,11 @@ public BinaryStore getBinaryStore() throws Exception {
12361234
public String getType() {
12371235
return binaryStorage.getString(FieldName.TYPE, FieldValue.BINARY_STORAGE_TYPE_TRANSIENT);
12381236
}
1239-
1237+
12401238
protected MimeTypeDetector getMimeTypeDetector(Environment environment) {
1241-
String mimeTypeDetection = binaryStorage.getString(FieldName.MIMETYPE_DETECTION, FieldValue.MIMETYPE_DETECTION_CONTENT);
1242-
switch (mimeTypeDetection.toLowerCase()) {
1243-
case FieldValue.MIMETYPE_DETECTION_CONTENT: {
1244-
return new ContentDetector(environment);
1245-
}
1246-
case FieldValue.MIMETYPE_DETECTION_NAME: {
1247-
return new NameOnlyDetector(environment);
1248-
}
1249-
case FieldValue.MIMETYPE_DETECTION_NONE: {
1250-
return NullMimeTypeDetector.INSTANCE;
1251-
}
1252-
default: {
1253-
throw new IllegalArgumentException("Unknown mime-type detector setting: " + mimeTypeDetection);
1254-
}
1255-
}
1239+
String mimeTypeDetection = binaryStorage.getString(FieldName.MIMETYPE_DETECTION,
1240+
FieldValue.MIMETYPE_DETECTION_CONTENT);
1241+
return MimeTypeDetectors.createDetectorFor(mimeTypeDetection, environment);
12561242
}
12571243

12581244
/*
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* ModeShape (http://www.modeshape.org)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.modeshape.jcr.mimetype;
18+
19+
import java.io.BufferedReader;
20+
import java.io.IOException;
21+
import java.io.InputStreamReader;
22+
import java.util.HashMap;
23+
import java.util.Map;
24+
import javax.jcr.Binary;
25+
import javax.jcr.RepositoryException;
26+
import org.modeshape.common.util.StringUtil;
27+
import org.modeshape.schematic.annotation.ThreadSafe;
28+
29+
/**
30+
* A {@link MimeTypeDetector} implementation which uses a properties file to load existing mime types and decides based on the
31+
* name of a binary what the mime type is.
32+
*
33+
* @author Horia Chiorean ([email protected])
34+
*/
35+
@ThreadSafe
36+
public final class DefaultMimeTypeDetector implements MimeTypeDetector {
37+
38+
private final Map<String, String> mimeTypes;
39+
40+
public DefaultMimeTypeDetector() {
41+
mimeTypes = loadMimeTypes();
42+
}
43+
44+
private Map<String, String> loadMimeTypes() {
45+
try (BufferedReader reader =
46+
new BufferedReader(new InputStreamReader(DefaultMimeTypeDetector.class.getResourceAsStream("mimetypes.properties")))) {
47+
Map<String, String> result = new HashMap<>();
48+
reader.lines().forEach(line -> {
49+
String[] parts = line.split("\\s");
50+
if (parts.length >= 2) {
51+
String mimeType = parts[0];
52+
for (int i = 1; i < parts.length; i++) {
53+
String extension = parts[i].trim();
54+
if (!StringUtil.isBlank(extension)) {
55+
result.put(extension, mimeType);
56+
}
57+
}
58+
}
59+
});
60+
return result;
61+
} catch (IOException e) {
62+
throw new RuntimeException(e);
63+
}
64+
}
65+
66+
@Override
67+
public String mimeTypeOf(String name, Binary binaryValue) throws RepositoryException, IOException {
68+
if (name == null) {
69+
return null;
70+
}
71+
int lastDotIdx = name.lastIndexOf('.');
72+
String extension = lastDotIdx > 0 && lastDotIdx + 1 < name.length() ? name.substring(lastDotIdx + 1) : name;
73+
return mimeTypes.get(extension);
74+
}
75+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* ModeShape (http://www.modeshape.org)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.modeshape.jcr.mimetype;
18+
19+
import org.modeshape.common.logging.Logger;
20+
import org.modeshape.jcr.Environment;
21+
import org.modeshape.jcr.RepositoryConfiguration;
22+
import org.modeshape.jcr.mimetype.tika.TikaContentDetector;
23+
import org.modeshape.jcr.mimetype.tika.TikaNameOnlyDetector;
24+
25+
/**
26+
* Class which decides which {@link MimeTypeDetector} implementation to use at runtime
27+
*
28+
* @author Horia Chiorean ([email protected])
29+
*/
30+
public final class MimeTypeDetectors {
31+
32+
private static final Logger LOGGER = Logger.getLogger(MimeTypeDetector.class);
33+
private static final boolean TIKA_AVAILABLE;
34+
35+
static {
36+
boolean tikaAvailable = true;
37+
ClassLoader classLoader = MimeTypeDetector.class.getClassLoader();
38+
try {
39+
Class.forName("org.apache.tika.detect.DefaultDetector", true, classLoader);
40+
Class.forName("org.apache.tika.metadata.Metadata", true, classLoader);
41+
} catch (Throwable t) {
42+
tikaAvailable = false;
43+
}
44+
TIKA_AVAILABLE = tikaAvailable;
45+
if (LOGGER.isDebugEnabled()) {
46+
if (TIKA_AVAILABLE) {
47+
LOGGER.debug("Tika is available in the CP; will be used for mimetype detection");
48+
} else {
49+
LOGGER.debug("Tika is not available in the CP; ModeShape will use a DefaultMimetypeDetector");
50+
}
51+
}
52+
}
53+
54+
private MimeTypeDetectors() {
55+
}
56+
57+
/**
58+
* Returns a new mime type detector implementation based on
59+
* the repository {@link org.modeshape.jcr.RepositoryConfiguration.FieldName#MIMETYPE_DETECTION} configuration
60+
*
61+
* @param mimeTypeDetectionConfig a {@code String}, may not be null
62+
* @param environment an {@link Environment} instance specific to a repository
63+
* @return a {@link MimeTypeDetector} instance
64+
*/
65+
public static MimeTypeDetector createDetectorFor(String mimeTypeDetectionConfig, Environment environment) {
66+
switch (mimeTypeDetectionConfig.toLowerCase()) {
67+
case RepositoryConfiguration.FieldValue.MIMETYPE_DETECTION_CONTENT: {
68+
return TIKA_AVAILABLE ? new TikaContentDetector(environment) : new DefaultMimeTypeDetector();
69+
}
70+
case RepositoryConfiguration.FieldValue.MIMETYPE_DETECTION_NAME: {
71+
return TIKA_AVAILABLE ? new TikaNameOnlyDetector(environment) : new DefaultMimeTypeDetector();
72+
}
73+
case RepositoryConfiguration.FieldValue.MIMETYPE_DETECTION_NONE: {
74+
return NullMimeTypeDetector.INSTANCE;
75+
}
76+
default: {
77+
throw new IllegalArgumentException("Unknown mime-type detector setting: " + mimeTypeDetectionConfig);
78+
}
79+
}
80+
}
81+
}

modeshape-jcr/src/main/java/org/modeshape/jcr/mimetype/ContentDetector.java renamed to modeshape-jcr/src/main/java/org/modeshape/jcr/mimetype/tika/TikaContentDetector.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16-
package org.modeshape.jcr.mimetype;
16+
package org.modeshape.jcr.mimetype.tika;
1717

1818
import java.io.InputStream;
1919
import org.apache.tika.detect.DefaultDetector;
@@ -27,17 +27,18 @@
2727
import org.modeshape.common.annotation.ThreadSafe;
2828
import org.modeshape.jcr.Environment;
2929
import org.modeshape.jcr.JcrI18n;
30+
import org.modeshape.jcr.mimetype.MimeTypeDetector;
3031

3132
/**
3233
* {@link MimeTypeDetector} implementation which uses Apache Tika to determine the mimetype of a given binary, based on the
3334
* content (binary) header. This involves reading at least the first X bytes from each binary and is more expensive than
34-
* {@link NameOnlyDetector}
35+
* {@link TikaNameOnlyDetector}
3536
*
3637
* @author Horia Chiorean ([email protected])
3738
**/
3839
@Immutable
3940
@ThreadSafe
40-
public final class ContentDetector extends TikaMimeTypeDetector {
41+
public final class TikaContentDetector extends TikaMimeTypeDetector {
4142

4243
private DefaultDetector detector;
4344

@@ -46,7 +47,7 @@ public final class ContentDetector extends TikaMimeTypeDetector {
4647
*
4748
* @param environment the {@link Environment} to use for class loading; may not be {@code null}
4849
*/
49-
public ContentDetector( Environment environment ) {
50+
public TikaContentDetector(Environment environment) {
5051
super(environment);
5152
}
5253

modeshape-jcr/src/main/java/org/modeshape/jcr/mimetype/TikaMimeTypeDetector.java renamed to modeshape-jcr/src/main/java/org/modeshape/jcr/mimetype/tika/TikaMimeTypeDetector.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16-
package org.modeshape.jcr.mimetype;
16+
package org.modeshape.jcr.mimetype.tika;
1717

1818
import java.io.IOException;
1919
import java.io.InputStream;
@@ -26,6 +26,7 @@
2626
import org.modeshape.common.util.SelfClosingInputStream;
2727
import org.modeshape.common.util.StringUtil;
2828
import org.modeshape.jcr.Environment;
29+
import org.modeshape.jcr.mimetype.MimeTypeDetector;
2930

3031
/**
3132
* A base class for the {@link MimeTypeDetector}s that use the Tika library.

modeshape-jcr/src/main/java/org/modeshape/jcr/mimetype/NameOnlyDetector.java renamed to modeshape-jcr/src/main/java/org/modeshape/jcr/mimetype/tika/TikaNameOnlyDetector.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16-
package org.modeshape.jcr.mimetype;
16+
package org.modeshape.jcr.mimetype.tika;
1717

1818
import java.io.InputStream;
1919
import org.apache.tika.detect.Detector;
@@ -25,6 +25,7 @@
2525
import org.modeshape.common.annotation.ThreadSafe;
2626
import org.modeshape.jcr.Environment;
2727
import org.modeshape.jcr.JcrI18n;
28+
import org.modeshape.jcr.mimetype.MimeTypeDetector;
2829

2930
/**
3031
* {@link MimeTypeDetector} implementation which uses Apache Tika to determine the mimetype of a given binary, based only
@@ -34,7 +35,7 @@
3435
*/
3536
@Immutable
3637
@ThreadSafe
37-
public final class NameOnlyDetector extends TikaMimeTypeDetector {
38+
public final class TikaNameOnlyDetector extends TikaMimeTypeDetector {
3839

3940
private Detector detector;
4041

@@ -43,7 +44,7 @@ public final class NameOnlyDetector extends TikaMimeTypeDetector {
4344
*
4445
* @param environment the {@link Environment} to use for class loading; may not be {@code null}
4546
*/
46-
public NameOnlyDetector( Environment environment ) {
47+
public TikaNameOnlyDetector(Environment environment) {
4748
super(environment);
4849
}
4950

0 commit comments

Comments
 (0)