Skip to content

Commit 296628d

Browse files
author
Wolfgang Walter SAUER
authored
Merge pull request #42 from clarin-eric/vaadin_replacement
Vaadin replacement
2 parents 18f1d68 + b97b870 commit 296628d

File tree

136 files changed

+5156
-19618
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

136 files changed

+5156
-19618
lines changed

curation-module-core/pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<parent>
44
<artifactId>curation-module</artifactId>
55
<groupId>eu.clarin.cmdi</groupId>
6-
<version>2.4</version>
6+
<version>3.0</version>
77
</parent>
88
<modelVersion>4.0.0</modelVersion>
99
<artifactId>curation-module-core</artifactId>
@@ -93,7 +93,7 @@
9393
<dependency>
9494
<groupId>eu.clarin.cmdi</groupId>
9595
<artifactId>linkChecker</artifactId>
96-
<version>2.4</version>
96+
<version>3.0</version>
9797
</dependency>
9898
<!--<dependency> -->
9999
<!--<groupId>com.github.mfornos</groupId> -->
@@ -143,5 +143,5 @@
143143
</plugin>
144144
</plugins>
145145
</build>
146-
<version>2.4</version>
146+
<version>3.0</version>
147147
</project>

curation-module-core/src/main/java/eu/clarin/cmdi/curation/cr/CRService.java

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
package eu.clarin.cmdi.curation.cr;
22

3+
import java.io.IOException;
4+
import java.io.InputStreamReader;
5+
import java.net.MalformedURLException;
6+
import java.net.URL;
7+
import java.nio.CharBuffer;
38
import java.util.Collection;
49
import java.util.concurrent.ExecutionException;
510
import java.util.regex.Matcher;
@@ -60,11 +65,35 @@ public ProfileHeader createProfileHeader(String schemaLocation, String cmdiVersi
6065

6166
if(header == null){
6267
header = new ProfileHeader();
63-
header.setId(getIdFromSchemaLocation(schemaLocation));
6468
header.setSchemaLocation(schemaLocation);
65-
header.setCmdiVersion(cmdiVersion);
66-
header.setPublic(false);
67-
69+
header.setId(getIdFromSchemaLocation(schemaLocation));
70+
header.setCmdiVersion(cmdiVersion);
71+
header.setPublic(false);
72+
73+
if(header.getId() == null) { // when the id can't be extracted from the schema location we have to get it from the file content
74+
CharBuffer buffer = CharBuffer.allocate(1000);
75+
76+
InputStreamReader reader;
77+
try {
78+
reader = new InputStreamReader(new URL(schemaLocation).openStream());
79+
reader.read(buffer);
80+
String content = buffer.rewind().toString();
81+
82+
Matcher matcher = PROFILE_ID_PATTERN.matcher(content);
83+
84+
if(matcher.find())
85+
header.setId(matcher.group());
86+
87+
if(!content.contains("http://www.clarin.eu/cmd/1"))
88+
header.setCmdiVersion("1.1");
89+
}
90+
catch (MalformedURLException ex) {
91+
_logger.error("schema location " + schemaLocation + " is no valid URL", ex);
92+
}
93+
catch (IOException ex) {
94+
_logger.error("couldn't read from schema location " + schemaLocation, ex);
95+
}
96+
}
6897
}
6998
header.setLocalFile(isLocalFile);
7099
return header;
@@ -117,5 +146,4 @@ public String getIdFromSchemaLocation(String schemaLocation) {
117146

118147
return matcher.find()? matcher.group():null;
119148
}
120-
121149
}

curation-module-core/src/main/java/eu/clarin/cmdi/curation/cr/ProfileCacheFactory.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,12 @@
2121
import javax.xml.validation.SchemaFactory;
2222
import java.io.File;
2323
import java.io.IOException;
24+
import java.net.URI;
25+
import java.net.URISyntaxException;
2426
import java.nio.file.Files;
2527
import java.nio.file.Path;
28+
import java.nio.file.Paths;
29+
import java.nio.file.StandardCopyOption;
2630
import java.util.concurrent.TimeUnit;
2731

2832
class ProfileCacheFactory {
@@ -74,7 +78,7 @@ public ProfileCacheLoader(boolean isPublicProfilesCache) {
7478

7579

7680
@Override
77-
public ProfileCacheEntry load(ProfileHeader header) throws IOException, VTDException, SAXException {
81+
public ProfileCacheEntry load(ProfileHeader header) throws IOException, VTDException, SAXException, URISyntaxException {
7882

7983

8084
_logger.info("Profile {} is not in the cache, it will be loaded", header.getId());
@@ -103,6 +107,7 @@ public ProfileCacheEntry load(ProfileHeader header) throws IOException, VTDExcep
103107
}
104108

105109
} else {//non-public profiles are not cached on disk
110+
106111
_logger.debug("schema {} is not public. Schema will be downloaded in temp folder", header.getId());
107112

108113

@@ -125,8 +130,13 @@ public ProfileCacheEntry load(ProfileHeader header) throws IOException, VTDExcep
125130

126131

127132
_logger.info("XSD for the {} is not in the local cache, it will be downloaded", header.getId());
128-
new HTTPLinkChecker(15000, 5, Configuration.USERAGENT).download(header.getSchemaLocation(), xsd.toFile());
129-
133+
134+
if(header.getSchemaLocation().startsWith("file:")) {
135+
Files.move(Paths.get(new URI(header.getSchemaLocation())), xsd, StandardCopyOption.REPLACE_EXISTING);
136+
}
137+
else {
138+
new HTTPLinkChecker(15000, 5, Configuration.USERAGENT).download(header.getSchemaLocation(), xsd.toFile());
139+
}
130140
}
131141
}
132142

curation-module-core/src/main/java/eu/clarin/cmdi/curation/cr/ProfileHeader.java

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ public class ProfileHeader {
1616

1717
private String id;
1818
private String schemaLocation;
19-
private String url;
19+
// private String url;
2020
private String name;
2121
private String description;
2222
private String cmdiVersion;
@@ -122,11 +122,9 @@ public String toString() {
122122
.toString();
123123
}
124124

125-
public String getUrl() {
126-
return url;
127-
}
128-
129-
public void setUrl(String url) {
130-
this.url = url;
131-
}
125+
/*
126+
* public String getUrl() { return url; }
127+
*
128+
* public void setUrl(String url) { this.url = url; }
129+
*/
132130
}

curation-module-core/src/main/java/eu/clarin/cmdi/curation/entities/CMDCollection.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,24 @@
11
package eu.clarin.cmdi.curation.entities;
22

33
import java.nio.file.Path;
4-
import java.util.ArrayList;
5-
import java.util.List;
4+
import java.util.ArrayDeque;
5+
import java.util.Deque;
6+
67

78
import eu.clarin.cmdi.curation.processor.AbstractProcessor;
89
import eu.clarin.cmdi.curation.processor.CollectionProcessor;
910

1011
public class CMDCollection extends CurationEntity {
1112

12-
List<CurationEntity> children;
13+
Deque<CurationEntity> children;
1314

1415
long numOfFiles;
1516
long maxFileSize = 0;
1617
long minFileSize = Long.MAX_VALUE;
1718

1819
public CMDCollection(Path path) {
1920
super(path);
20-
children = new ArrayList<CurationEntity>();
21+
children = new ArrayDeque<CurationEntity>();
2122
}
2223

2324
@Override
@@ -53,7 +54,7 @@ private void aggregateWithDir(CMDCollection child) {
5354
minFileSize = child.minFileSize;
5455
}
5556

56-
public List<CurationEntity> getChildren() {
57+
public Deque<CurationEntity> getChildren() {
5758
return children;
5859
}
5960

curation-module-core/src/main/java/eu/clarin/cmdi/curation/entities/CMDProfile.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package eu.clarin.cmdi.curation.entities;
22

3+
import java.nio.file.Path;
4+
35
import eu.clarin.cmdi.curation.processor.AbstractProcessor;
46
import eu.clarin.cmdi.curation.processor.CMDProfileProcessor;
57

@@ -13,6 +15,10 @@ public class CMDProfile extends CurationEntity {
1315

1416
private String schemaLocation;
1517
private String cmdiVersion;
18+
19+
public CMDProfile(Path path) {
20+
super(path);
21+
}
1622

1723
public CMDProfile(String schemaLocation, String cmdiVersion) {
1824
super(null);

curation-module-core/src/main/java/eu/clarin/cmdi/curation/instance_parser/InstanceParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public class InstanceParser {
3131

3232
static{
3333
TransformerFactory factory = TransformerFactory.newInstance();
34-
Source xslt = new StreamSource(InstanceParser.class.getResourceAsStream("/instanceTransformer.xsl"));
34+
Source xslt = new StreamSource(InstanceParser.class.getResourceAsStream("/xslt/instanceTransformer.xsl"));
3535
try {
3636
tranformer = factory.newTransformer(xslt);
3737
} catch (TransformerConfigurationException e) {

curation-module-core/src/main/java/eu/clarin/cmdi/curation/main/Configuration.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,11 @@ public class Configuration {
3030
public static Path OUTPUT_DIRECTORY = null;
3131
public static Path CACHE_DIRECTORY = null;
3232
public static Path COLLECTION_HTML_DIRECTORY = null;
33+
public static int THREAD_POOL_SIZE=100;
3334
public static Collection<String> FACETS = null;
3435
public static int REDIRECT_FOLLOW_LIMIT;
3536
public static int TIMEOUT;
36-
private static final int TIMEOUTDEFAULT = 5000;//in ms(if config file doesnt have it)
37+
private static int TIMEOUTDEFAULT = 5000;//in ms(if config file doesnt have it)
3738

3839
public static VloConfig VLO_CONFIG;
3940
public static boolean DATABASE;
@@ -78,7 +79,7 @@ private static void readProperties(Properties config) throws IOException {
7879
} else {
7980
TIMEOUT = Integer.parseInt(timeout);
8081
}
81-
82+
THREAD_POOL_SIZE = Integer.valueOf(config.getProperty("THREAD_POOL_SIZE","100"));
8283

8384
String[] facets = config.getProperty("FACETS").split(",");
8485
FACETS = Arrays.asList(facets).stream().map(f -> f.trim()).collect(Collectors.toList());

curation-module-core/src/main/java/eu/clarin/cmdi/curation/main/CurationModule.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import eu.clarin.curation.linkchecker.httpLinkChecker.HTTPLinkChecker;
1111

1212
import java.io.IOException;
13+
import java.net.MalformedURLException;
1314
import java.net.URL;
1415
import java.nio.file.Files;
1516
import java.nio.file.Path;
@@ -30,9 +31,14 @@ public Report<?> processCMDProfile(URL schemaLocation) {
3031

3132
return new CMDProfile(schemaLocation.toString(), "1.x").generateReport(null);
3233
}
34+
35+
@Override
36+
public Report<?> processCMDProfile(Path path) throws MalformedURLException {
3337

34-
@Override
38+
return processCMDProfile(path.toUri().toURL());
39+
}
3540

41+
@Override
3642
public Report<?> processCMDInstance(Path path) throws IOException {
3743
if (Files.notExists(path))
3844
throw new IOException(path.toString() + " doesn't exist!");
@@ -45,10 +51,10 @@ public Report<?> processCMDInstance(Path path) throws IOException {
4551
@Override
4652
public Report<?> processCMDInstance(URL url) throws IOException {
4753
String path = FileNameEncoder.encode(url.toString()) + ".xml";
48-
Path cmdiFile = Paths.get(System.getProperty("java.io.tmpdir"), path);
49-
new HTTPLinkChecker(15000, 5, Configuration.USERAGENT).download(url.toString(), cmdiFile.toFile());
50-
long size = Files.size(cmdiFile);
51-
CMDInstance cmdInstance = new CMDInstance(cmdiFile, size);
54+
Path cmdiFilePath = Paths.get(System.getProperty("java.io.tmpdir"), path);
55+
new HTTPLinkChecker(15000, 5, Configuration.USERAGENT).download(url.toString(), cmdiFilePath.toFile());
56+
long size = Files.size(cmdiFilePath);
57+
CMDInstance cmdInstance = new CMDInstance(cmdiFilePath, size);
5258
cmdInstance.setUrl(url.toString());
5359

5460
Report<?> report = cmdInstance.generateReport(null);

curation-module-core/src/main/java/eu/clarin/cmdi/curation/main/CurationModuleInterface.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package eu.clarin.cmdi.curation.main;
22

33
import java.io.IOException;
4+
import java.net.MalformedURLException;
45
import java.net.URL;
56
import java.nio.file.Path;
67
import java.util.Collection;
@@ -12,9 +13,11 @@
1213
public interface CurationModuleInterface {
1314

1415
public Report processCMDProfile(String profileId);
16+
17+
public Report processCMDProfile(Path path) throws MalformedURLException, IOException;
1518

1619

17-
public Report processCMDProfile(URL schemaLocation);
20+
public Report processCMDProfile(URL schemaLocation) throws IOException;
1821

1922
/*
2023
* throws Exception if file doesn't exist or is invalid

0 commit comments

Comments
 (0)