diff --git a/.editorconfig b/.editorconfig
index 5933aeedee..422f746882 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -4,9 +4,6 @@ root = true
indent_style=space
indent_size=4
-[init-indices.sql]
-indent_size=2
-
[/pom.xml]
indent_style=tab
diff --git a/.idea/runConfigurations/Deploy__dev_.xml b/.idea/runConfigurations/Deploy__dev_.xml
new file mode 100644
index 0000000000..3e16af9e38
--- /dev/null
+++ b/.idea/runConfigurations/Deploy__dev_.xml
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/runConfigurations/Deploy__staging_.xml b/.idea/runConfigurations/Deploy__staging_.xml
new file mode 100644
index 0000000000..d834542fa1
--- /dev/null
+++ b/.idea/runConfigurations/Deploy__staging_.xml
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/gemma-cli/pom.xml b/gemma-cli/pom.xml
index eba8b0470c..3b55dacd79 100644
--- a/gemma-cli/pom.xml
+++ b/gemma-cli/pom.xml
@@ -3,7 +3,7 @@
gemmagemma
- 1.31.2
+ 1.31.34.0.0gemma-cli
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignBlatCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignBlatCli.java
index c0c6075aa2..d25cfc1d96 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignBlatCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignBlatCli.java
@@ -73,7 +73,7 @@ protected void buildOptions( Options options ) {
.desc(
"Threshold (0-1.0) for acceptance of BLAT alignments [Default = " + this.blatScoreThreshold + "]" )
.longOpt( "scoreThresh" )
- .type( Double.class )
+ .type( Number.class )
.build();
options.addOption( Option.builder( "sensitive" ).desc( "Run on more sensitive server, if available" ).build() );
@@ -107,7 +107,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
// }
if ( commandLine.hasOption( 's' ) ) {
- this.blatScoreThreshold = ( Double ) commandLine.getParsedOptionValue( 's' );
+ this.blatScoreThreshold = ( ( Number ) commandLine.getParsedOptionValue( 's' ) ).doubleValue();
}
TaxonService taxonService = this.getBean( TaxonService.class );
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignProbeMapperCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignProbeMapperCli.java
index 7f0ba64765..2bc30977cd 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignProbeMapperCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignProbeMapperCli.java
@@ -87,17 +87,17 @@ protected void buildOptions( Options options ) {
super.buildOptions( options );
options.addOption( Option.builder( "i" ).hasArg().argName( "value" )
- .type( Double.class )
+ .type( Number.class )
.desc( "Sequence identity threshold, default = " + ProbeMapperConfig.DEFAULT_IDENTITY_THRESHOLD )
.longOpt( "identityThreshold" ).build() );
options.addOption( Option.builder( "s" ).hasArg().argName( "value" )
- .type( Double.class )
+ .type( Number.class )
.desc( "Blat score threshold, default = " + ProbeMapperConfig.DEFAULT_SCORE_THRESHOLD )
.longOpt( "scoreThreshold" ).build() );
options.addOption( Option.builder( "o" ).hasArg().argName( "value" )
- .type( Double.class )
+ .type( Number.class )
.desc( "Minimum fraction of probe overlap with exons, default = " + ProbeMapperConfig.DEFAULT_MINIMUM_EXON_OVERLAP_FRACTION )
.longOpt( "overlapThreshold" )
.build() );
@@ -234,7 +234,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
}
if ( commandLine.hasOption( 's' ) ) {
- blatScoreThreshold = ( Double ) commandLine.getParsedOptionValue( 's' );
+ blatScoreThreshold = ( ( Number ) commandLine.getParsedOptionValue( 's' ) ).doubleValue();
if ( blatScoreThreshold < 0 || blatScoreThreshold > 1 ) {
throw new IllegalArgumentException( "BLAT score threshold must be between 0 and 1" );
}
@@ -249,14 +249,14 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
this.mirnaOnlyModeOption = commandLine.hasOption( ArrayDesignProbeMapperCli.MIRNA_ONLY_MODE_OPTION );
if ( commandLine.hasOption( 'i' ) ) {
- identityThreshold = ( Double ) commandLine.getParsedOptionValue( 'i' );
+ identityThreshold = ( ( Number ) commandLine.getParsedOptionValue( 'i' ) ).doubleValue();
if ( identityThreshold < 0 || identityThreshold > 1 ) {
throw new IllegalArgumentException( "Identity threshold must be between 0 and 1" );
}
}
if ( commandLine.hasOption( 'o' ) ) {
- overlapThreshold = ( Double ) commandLine.getParsedOptionValue( 'o' );
+ overlapThreshold = ( ( Number ) commandLine.getParsedOptionValue( 'o' ) ).doubleValue();
if ( overlapThreshold < 0 || overlapThreshold > 1 ) {
throw new IllegalArgumentException( "Overlap threshold must be between 0 and 1" );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java
index effbe261f3..e7efebba37 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java
@@ -8,7 +8,7 @@
import org.springframework.core.task.AsyncTaskExecutor;
import ubic.gemma.core.ontology.OntologyService;
import ubic.gemma.core.util.AbstractCLI;
-import ubic.gemma.model.common.description.CharacteristicValueObject;
+import ubic.gemma.model.common.description.Characteristic;
import java.util.LinkedHashMap;
import java.util.List;
@@ -90,14 +90,13 @@ protected void doWork() throws Exception {
log.info( "Ontologies warmed up, starting check..." );
- Map vos = ontologyService.findObsoleteTermUsage();
+ Map vos = ontologyService.findObsoleteTermUsage();
AbstractCLI.log.info( "Obsolete term check finished, printing ..." );
System.out.println( "Value\tValueUri\tCount" );
- for ( CharacteristicValueObject vo : vos.values() ) {
- System.out.println( vo.getValue() + "\t" + vo.getValueUri() + "\t" + vo.getNumTimesUsed() );
+ for ( Map.Entry vo : vos.entrySet() ) {
+ System.out.println( vo.getKey().getValue() + "\t" + vo.getKey().getValueUri() + "\t" + vo.getValue() );
}
-
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/IndexGemmaCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/IndexGemmaCLI.java
index 94a9e68c4c..f4a5cb0b70 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/IndexGemmaCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/IndexGemmaCLI.java
@@ -4,6 +4,7 @@
import org.apache.commons.cli.Options;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Component;
import ubic.gemma.core.search.IndexerService;
import ubic.gemma.core.util.AbstractCLI;
import ubic.gemma.model.analysis.expression.ExpressionExperimentSet;
@@ -17,26 +18,26 @@
import ubic.gemma.model.genome.gene.GeneSet;
import java.io.File;
+import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;
+@Component
public class IndexGemmaCLI extends AbstractCLI {
- private static final String THREADS_OPTION = "threads";
-
/**
* A list of all searchable entities this CLI supports.
*/
private static final IndexableEntity[] indexableEntities = {
- new IndexableEntity( "g", "genes", Gene.class ),
- new IndexableEntity( "e", "datasets", ExpressionExperiment.class ),
- new IndexableEntity( "a", "platforms", ArrayDesign.class ),
- new IndexableEntity( "b", "bibliographic references", BibliographicReference.class ),
- new IndexableEntity( "s", "probes", CompositeSequence.class ),
- new IndexableEntity( "q", "sequences", BioSequence.class ),
- new IndexableEntity( "x", "datasets groups", ExpressionExperimentSet.class ),
- new IndexableEntity( "y", "gene sets", GeneSet.class )
+ new IndexableEntity( "g", "genes", Gene.class, 1000 ),
+ new IndexableEntity( "e", "datasets", ExpressionExperiment.class, 1000 ),
+ new IndexableEntity( "a", "platforms", ArrayDesign.class, 100 ),
+ new IndexableEntity( "b", "bibliographic references", BibliographicReference.class, 1000 ),
+ new IndexableEntity( "s", "probes", CompositeSequence.class, 100000 ),
+ new IndexableEntity( "q", "sequences", BioSequence.class, 100000 ),
+ new IndexableEntity( "x", "datasets groups", ExpressionExperimentSet.class, 100 ),
+ new IndexableEntity( "y", "gene sets", GeneSet.class, 10 )
};
@lombok.Value
@@ -44,6 +45,7 @@ private static class IndexableEntity {
String option;
String description;
Class extends Identifiable> clazz;
+ int loggingFrequency;
}
@Autowired
@@ -52,8 +54,7 @@ private static class IndexableEntity {
@Value("${gemma.search.dir}")
private File searchDir;
- private final Set> classesToIndex = new HashSet<>();
- private int numThreads;
+ private final Set classesToIndex = new HashSet<>();
@Override
public String getCommandName() {
@@ -82,21 +83,28 @@ protected void buildOptions( Options options ) {
protected void processOptions( CommandLine commandLine ) {
for ( IndexableEntity ie : indexableEntities ) {
if ( commandLine.hasOption( ie.option ) ) {
- classesToIndex.add( ie.clazz );
+ classesToIndex.add( ie );
}
}
+ if ( classesToIndex.isEmpty() ) {
+ classesToIndex.addAll( Arrays.asList( indexableEntities ) );
+ }
+ indexerService.setNumThreads( getNumThreads() );
}
@Override
protected void doWork() throws Exception {
- if ( classesToIndex.isEmpty() ) {
- log.info( String.format( "All entities will be indexed under %s.", searchDir.getAbsolutePath() ) );
- indexerService.index( getNumThreads() );
- } else {
+ if ( classesToIndex.size() < indexableEntities.length ) {
log.info( String.format( "The following entities will be indexed under %s:\n\t%s",
searchDir.getAbsolutePath(),
- classesToIndex.stream().map( Class::getName ).collect( Collectors.joining( "\n\t" ) ) ) );
- indexerService.index( classesToIndex, getNumThreads() );
+ classesToIndex.stream().map( IndexableEntity::getClazz ).map( Class::getName ).collect( Collectors.joining( "\n\t" ) ) ) );
+ } else {
+ log.info( String.format( "All entities will be indexed under %s.", searchDir.getAbsolutePath() ) );
+ }
+ for ( IndexableEntity classToIndex : classesToIndex ) {
+ log.info( "Indexing " + classToIndex.getClazz().getName() + "..." );
+ indexerService.setLoggingFrequency( classToIndex.loggingFrequency );
+ indexerService.index( classToIndex.clazz );
}
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java
index 7f965191b3..8990039f6b 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java
@@ -291,7 +291,7 @@ protected void buildOptions( Options options ) {
.build();
options.addOption( chooseCutOption );
- options.addOption( Option.builder( "probeDegreeLim" ).hasArg().type( Integer.class ).build() );
+ options.addOption( Option.builder( "probeDegreeLim" ).hasArg().type( Number.class ).build() );
// finer-grained control is possible, of course.
Option skipQC = Option.builder( "noqc" )
@@ -427,7 +427,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
}
if ( commandLine.hasOption( "probeDegreeLim" ) ) {
- this.linkAnalysisConfig.setProbeDegreeThreshold( ( Integer ) commandLine.getParsedOptionValue( "probeDegreeLim" ) );
+ this.linkAnalysisConfig.setProbeDegreeThreshold( ( ( Number ) commandLine.getParsedOptionValue( "probeDegreeLim" ) ).intValue() );
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java
new file mode 100644
index 0000000000..9a457755c3
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java
@@ -0,0 +1,52 @@
+package ubic.gemma.core.apps;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.springframework.beans.factory.annotation.Autowired;
+import ubic.gemma.core.util.AbstractAuthenticatedCLI;
+import ubic.gemma.persistence.service.TableMaintenanceUtil;
+
+import javax.annotation.Nullable;
+
+public class UpdateEE2CCli extends AbstractAuthenticatedCLI {
+
+ private static final String TRUNCATE_OPTION = "truncate";
+
+ @Autowired
+ private TableMaintenanceUtil tableMaintenanceUtil;
+
+ private boolean truncate;
+
+ @Override
+ protected void buildOptions( Options options ) {
+ options.addOption( TRUNCATE_OPTION, "truncate", false, "Truncate the table before updating it" );
+ }
+
+ @Override
+ protected void processOptions( CommandLine commandLine ) throws ParseException {
+ truncate = commandLine.hasOption( TRUNCATE_OPTION );
+ }
+
+ @Override
+ protected void doWork() throws Exception {
+ tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( truncate );
+ }
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "updateEe2c";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "Update the EXPRESSION_EXPERIMENT2CHARACTERISTIC table";
+ }
+
+ @Override
+ public GemmaCLI.CommandGroup getCommandGroup() {
+ return GemmaCLI.CommandGroup.EXPERIMENT;
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java
index 2dff2278ea..ab56b5e2da 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java
@@ -273,7 +273,7 @@ protected void addDateOption( Options options ) {
protected void addThreadsOption( Options options ) {
options.addOption( Option.builder( THREADS_OPTION ).argName( "numThreads" ).hasArg()
.desc( "Number of threads to use for batch processing." )
- .type( Integer.class )
+ .type( Number.class )
.build() );
}
@@ -349,7 +349,7 @@ protected void processStandardOptions( CommandLine commandLine ) throws ParseExc
this.autoSeek = commandLine.hasOption( AbstractCLI.AUTO_OPTION_NAME );
if ( commandLine.hasOption( THREADS_OPTION ) ) {
- this.numThreads = ( Integer ) commandLine.getParsedOptionValue( THREADS_OPTION );
+ this.numThreads = ( ( Number ) commandLine.getParsedOptionValue( THREADS_OPTION ) ).intValue();
if ( this.numThreads < 1 ) {
throw new IllegalArgumentException( "Number of threads must be greater than 1." );
}
diff --git a/gemma-core/pom.xml b/gemma-core/pom.xml
index 31a8933710..d4e5be3677 100644
--- a/gemma-core/pom.xml
+++ b/gemma-core/pom.xml
@@ -3,7 +3,7 @@
gemmagemma
- 1.31.2
+ 1.31.34.0.0gemma-core
@@ -216,8 +216,9 @@
${project.build.directory}/schema/gemma/gsec/sql/gsec-acl-ddl.sql${project.build.directory}/schema/gemma/gsec/sql/init-acl-indices.sql${project.basedir}/src/main/resources/sql/init-acls.sql
- ${project.basedir}/src/main/resources/sql/init-indices.sql${project.basedir}/src/main/resources/sql/init-entities.sql
+ ${project.basedir}/src/main/resources/sql/mysql/init-entities.sql
+ ${project.basedir}/src/main/resources/sql/init-data.sql${skipIntegrationTests}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java
index 618a8dbe83..2f06737a19 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java
@@ -133,8 +133,8 @@ private static Collection factorBatchConfoundTest( BioAssaySet ee
assert numBioMaterials > 0 : "No biomaterials for " + ef;
double p = Double.NaN;
- double chiSquare;
- int df;
+ double chiSquare = Double.NaN;
+ int df = 0;
int numBatches = batchFactor.getFactorValues().size();
if ( ExperimentalDesignUtils.isContinuous( ef ) ) {
@@ -238,8 +238,14 @@ private static Collection factorBatchConfoundTest( BioAssaySet ee
continue; // to the next factor
}
+ /*
+ * The problem with chi-square test is it is underpowered and we don't detect perfect confounds
+ * when the sample size is small e.g. 3 + 3.
+ * So for small sample sizes we apply some special cases 1) when we have a 2x2 table and 3) when we have a small number of batches and observations.
+ * Otherwise we use the chisquare test.
+ */
ChiSquareTest cst = new ChiSquareTest();
-
+ // initialize this value; we'll use it when my special test doesn't turn up anything.
try {
chiSquare = cst.chiSquare( finalCounts );
} catch ( IllegalArgumentException e ) {
@@ -248,10 +254,49 @@ private static Collection factorBatchConfoundTest( BioAssaySet ee
chiSquare = Double.NaN;
}
+ if ( finalCounts.length == 2 && finalCounts[0].length == 2 ) { // treat as odds ratio computation
+ double numerator = ( double ) finalCounts[0][0] * finalCounts[1][1];
+ double denominator = ( double ) finalCounts[0][1] * finalCounts[1][0];
+
+ // if either value is zero, we have a perfect confound
+ if ( numerator == 0 || denominator == 0 ) {
+ chiSquare = Double.POSITIVE_INFINITY; // effectively we shift to fisher's exact test here.
+ }
+
+ } else if ( numBioMaterials <= 10 && finalCounts.length <= 4 ) { // number of batches and number of samples is small
+
+ // look for pairs of rows and columns where there is only one non-zero value in each, which would be a confound.
+ for ( int r = 0; r < finalCounts.length; r++ ) {
+ int numNonzero = 0;
+ int nonZeroIndex = -1;
+ for ( int c = 0; c < finalCounts[0].length; c++ ) {
+ if ( finalCounts[r][c] != 0 ) {
+ numNonzero++;
+ nonZeroIndex = c;
+ }
+ }
+ // inspect the column
+ if ( numNonzero == 1 ) {
+ int numNonzeroColumnVals = 0;
+ for ( int r2 = 0; r2 < finalCounts.length; r2++ ) {
+ if ( finalCounts[r2][nonZeroIndex] != 0 ) {
+ numNonzeroColumnVals++;
+ }
+ }
+ if ( numNonzeroColumnVals == 1 ) {
+ chiSquare = Double.POSITIVE_INFINITY;
+ break;
+ }
+ }
+ }
+ }
+
df = ( finalCounts.length - 1 ) * ( finalCounts[0].length - 1 );
ChiSquaredDistribution distribution = new ChiSquaredDistribution( df );
- if ( !Double.isNaN( chiSquare ) ) {
+ if ( chiSquare == Double.POSITIVE_INFINITY ) {
+ p = 0.0;
+ } else if ( !Double.isNaN( chiSquare ) ) {
p = 1.0 - distribution.cumulativeProbability( chiSquare );
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ArrayDesignReportServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ArrayDesignReportServiceImpl.java
index 61b3d626e8..9e08bf31ef 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ArrayDesignReportServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ArrayDesignReportServiceImpl.java
@@ -44,7 +44,7 @@
/**
* @author jsantos
*/
-@Component
+@Component("arrayDesignReportService")
public class ArrayDesignReportServiceImpl implements ArrayDesignReportService {
private final static String HOME_DIR = Settings.getString( "gemma.appdata.home" );
private final static Log log = LogFactory.getLog( ArrayDesignReportServiceImpl.class );
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java
index 07cf730e2c..090a155674 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java
@@ -57,7 +57,7 @@
* @author paul
* @author klc
*/
-@Service
+@Service("expressionExperimentReportService")
public class ExpressionExperimentReportServiceImpl implements ExpressionExperimentReportService, InitializingBean {
private static final String NOTE_UPDATED_CONFOUND = "Updated batch confound";
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/WhatsNewServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/WhatsNewServiceImpl.java
index 75f49d531c..fa4c1dcdc6 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/WhatsNewServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/WhatsNewServiceImpl.java
@@ -52,7 +52,7 @@
*
* @author pavlidis
*/
-@Component
+@Component("whatsNewService")
@SuppressWarnings({ "unused", "WeakerAccess" }) // Possible external use
public class WhatsNewServiceImpl implements WhatsNewService {
diff --git a/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java b/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java
index 36191a53a8..06239e13dc 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java
@@ -18,19 +18,16 @@
*/
package ubic.gemma.core.externalDb;
+import com.zaxxer.hikari.HikariDataSource;
import lombok.Getter;
-import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.jdbc.core.JdbcTemplate;
-import org.springframework.jdbc.datasource.SimpleDriverDataSource;
import ubic.gemma.model.common.description.DatabaseType;
import ubic.gemma.model.common.description.ExternalDatabase;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.persistence.util.Settings;
-import java.sql.Driver;
-
/**
* Perform useful queries against GoldenPath (UCSC) databases.
*
@@ -57,40 +54,30 @@ public GoldenPath( Taxon taxon ) {
private static JdbcTemplate createJdbcTemplateFromConfig( Taxon taxon ) {
String host;
- int port;
- String user;
- String password;
String databaseName = getDbNameForTaxon( taxon );
- host = Settings.getString( "gemma.goldenpath.db.host" );
- port = Settings.getInt( "gemma.goldenpath.db.port", 3306 );
-
- user = Settings.getString( "gemma.goldenpath.db.user" );
- password = Settings.getString( "gemma.goldenpath.db.password" );
-
- SimpleDriverDataSource dataSource = new SimpleDriverDataSource();
- String url = "jdbc:mysql://" + host + ":" + port + "/" + databaseName + "?relaxAutoCommit=true&useSSL=false";
+ // SimpleDriverDataSource dataSource = new SimpleDriverDataSource();
+ HikariDataSource dataSource = new HikariDataSource();
+ dataSource.setPoolName( "goldenpath" );
+ String driverClassName = Settings.getString( "gemma.goldenpath.db.driver" );
+ String url = Settings.getString( "gemma.goldenpath.db.url" );
+ String user = Settings.getString( "gemma.goldenpath.db.user" );
+ String password = Settings.getString( "gemma.goldenpath.db.password" );
GoldenPath.log.info( "Connecting to " + databaseName );
GoldenPath.log.debug( "Connecting to Golden Path : " + url + " as " + user );
- String driver = Settings.getString( "gemma.goldenpath.db.driver" );
- if ( StringUtils.isBlank( driver ) ) {
- driver = Settings.getString( "gemma.db.driver" );
- GoldenPath.log.warn( "No DB driver configured for GoldenPath, falling back on gemma.db.driver=" + driver );
- }
- try {
- //noinspection unchecked
- dataSource.setDriverClass( ( Class extends Driver> ) Class.forName( driver ) );
- } catch ( ClassNotFoundException e ) {
- throw new RuntimeException( e );
- }
- dataSource.setUrl( url );
+ dataSource.setDriverClassName( driverClassName );
+ dataSource.setJdbcUrl( url );
dataSource.setUsername( user );
dataSource.setPassword( password );
+ dataSource.setMaximumPoolSize( Settings.getInt( "gemma.goldenpath.db.maximumPoolSize" ) );
+ dataSource.addDataSourceProperty( "relaxAutoCommit", "true" );
JdbcTemplate jdbcTemplate = new JdbcTemplate( dataSource );
jdbcTemplate.setFetchSize( 50 );
+ jdbcTemplate.execute( "use " + databaseName );
+
return jdbcTemplate;
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/genome/gene/service/GeneSearchServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/genome/gene/service/GeneSearchServiceImpl.java
index 2d5eef3706..a361b3ba9a 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/genome/gene/service/GeneSearchServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/genome/gene/service/GeneSearchServiceImpl.java
@@ -216,7 +216,7 @@ public Collection searchGenesAndGeneGroups( String qu
// convert result object to a value object
List> dbsgvo = taxonCheckedSets.stream()
.filter( Objects::nonNull )
- .map( sr -> SearchResult.from( sr, geneSetValueObjectHelper.convertToValueObject( sr.getResultObject() ) ) )
+ .map( sr -> sr.withResultObject( geneSetValueObjectHelper.convertToValueObject( sr.getResultObject() ) ) )
.collect( Collectors.toList() );
geneSets = SearchResultDisplayObject.convertSearchResults2SearchResultDisplayObjects( dbsgvo );
diff --git a/gemma-core/src/main/java/ubic/gemma/core/metrics/binder/ThreadPoolTaskExecutorMetrics.java b/gemma-core/src/main/java/ubic/gemma/core/metrics/binder/ThreadPoolTaskExecutorMetrics.java
new file mode 100644
index 0000000000..1b9778e290
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/metrics/binder/ThreadPoolTaskExecutorMetrics.java
@@ -0,0 +1,51 @@
+package ubic.gemma.core.metrics.binder;
+
+import io.micrometer.core.instrument.Gauge;
+import io.micrometer.core.instrument.MeterRegistry;
+import io.micrometer.core.instrument.binder.MeterBinder;
+import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
+
+import javax.annotation.Nullable;
+import javax.annotation.ParametersAreNonnullByDefault;
+
+@ParametersAreNonnullByDefault
+public class ThreadPoolTaskExecutorMetrics implements MeterBinder {
+
+ private final ThreadPoolTaskExecutor executor;
+
+ @Nullable
+ private String poolName;
+
+ public ThreadPoolTaskExecutorMetrics( ThreadPoolTaskExecutor executor ) {
+ this.executor = executor;
+ }
+
+ @Override
+ public void bindTo( MeterRegistry registry ) {
+ String poolName = this.poolName != null ? this.poolName : executor.getThreadNamePrefix();
+ Gauge.builder( "threadPool.corePoolSize", executor, ThreadPoolTaskExecutor::getCorePoolSize )
+ .description( "Core pool size" )
+ .tags( "pool", poolName )
+ .register( registry );
+ Gauge.builder( "threadPool.maxPoolSize", executor, e -> e.getMaxPoolSize() == Integer.MAX_VALUE ? Double.POSITIVE_INFINITY : e.getMaxPoolSize() )
+ .description( "Maximum pool size" )
+ .tags( "pool", poolName )
+ .register( registry );
+ Gauge.builder( "threadPool.poolSize", executor, ThreadPoolTaskExecutor::getPoolSize )
+ .description( "Pool size" )
+ .tags( "pool", poolName )
+ .register( registry );
+ Gauge.builder( "threadPool.activeCount", executor, ThreadPoolTaskExecutor::getActiveCount )
+ .description( "Number of active threads" )
+ .tags( "pool", poolName )
+ .register( registry );
+ Gauge.builder( "threadPool.queueSize", executor, e -> e.getThreadPoolExecutor().getQueue().size() )
+ .description( "Queue size" )
+ .tags( "pool", poolName )
+ .register( registry );
+ }
+
+ public void setPoolName( String poolName ) {
+ this.poolName = poolName;
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/AbstractOntologyResourceSimple.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/AbstractOntologyResourceSimple.java
index 1dd2be213c..c33793e7be 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/ontology/AbstractOntologyResourceSimple.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/AbstractOntologyResourceSimple.java
@@ -16,11 +16,22 @@ protected AbstractOntologyResourceSimple( @Nullable String uri, String label ) {
this.label = label;
}
+ @Override
+ public String getLocalName() {
+ return uri;
+ }
+
@Override
public String getLabel() {
return label;
}
+ @Nullable
+ @Override
+ public String getComment() {
+ return null;
+ }
+
@Override
@Nullable
public String getUri() {
diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java
index 6932d204c9..fd27df8d8f 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java
@@ -2,10 +2,13 @@
import lombok.EqualsAndHashCode;
import lombok.Value;
-import org.apache.commons.math3.util.Combinations;
+import lombok.extern.apachecommons.CommonsLog;
+import org.apache.commons.lang3.time.StopWatch;
import org.springframework.cache.Cache;
+import org.springframework.util.Assert;
import ubic.basecode.ontology.model.OntologyTerm;
import ubic.basecode.ontology.providers.OntologyService;
+import ubic.basecode.ontology.search.OntologySearchException;
import ubic.gemma.persistence.util.CacheUtils;
import javax.annotation.Nullable;
@@ -14,19 +17,49 @@
/**
* High-level cache abstraction for retrieving parents and children of a set of terms.
*
- * The main approach here for caching is to lookup all the possible {@code k-1} subsets (then {@code k - 2},
- * {@code k - 3}, ...) of a given query and only retrieve the difference from the {@link OntologyService}.
+ * The main approach here for caching is to enumerate cache keys to find subsets of a given query and only retrieve the
+ * difference from the {@link OntologyService}.
* @author poirigui
*/
+@CommonsLog
class OntologyCache {
- private final Cache parentsCache, childrenCache;
+ private final Cache searchCache, parentsCache, childrenCache;
- OntologyCache( Cache parentsCache, Cache childrenCache ) {
+ private int minSubsetSize = 1;
+
+ OntologyCache( Cache searchCache, Cache parentsCache, Cache childrenCache ) {
+ this.searchCache = searchCache;
this.parentsCache = parentsCache;
this.childrenCache = childrenCache;
}
+ /**
+ * Minimum size of subsets to consider when enumerating cache keys.
+ */
+ void setMinSubsetSize( int minSubsetSize ) {
+ Assert.isTrue( minSubsetSize > 0 );
+ this.minSubsetSize = minSubsetSize;
+ }
+
+ public Collection findTerm( OntologyService ontology, String query ) throws OntologySearchException {
+ SearchCacheKey key = new SearchCacheKey( ontology, query );
+
+ try ( CacheUtils.Lock ignored = CacheUtils.acquireReadLock( searchCache, key ) ) {
+ Cache.ValueWrapper value = searchCache.get( key );
+ if ( value != null ) {
+ //noinspection unchecked
+ return ( Collection ) value.get();
+ }
+ }
+
+ try ( CacheUtils.Lock ignored = CacheUtils.acquireWriteLock( searchCache, key ) ) {
+ Collection results = ontology.findTerm( query );
+ searchCache.put( key, results );
+ return results;
+ }
+ }
+
/**
* Obtain the parents of a given set of terms.
*/
@@ -41,6 +74,14 @@ Set getChildren( OntologyService os, Collection term
return getParentsOrChildren( os, terms, direct, includeAdditionalProperties, childrenCache, false );
}
+ /**
+ * Clear the search cache for all entries related to a given ontology service.
+ * @param serv
+ */
+ public void clearSearchCacheByOntology( OntologyService serv ) {
+ CacheUtils.evictIf( searchCache, key -> ( ( SearchCacheKey ) key ).getOntologyService().equals( serv ) );
+ }
+
/**
* Clear the cache for all entries related to a given ontology service.
*/
@@ -53,140 +94,102 @@ private Set getParentsOrChildren( OntologyService os, Collection termsSet = new HashSet<>( terms );
- Object key = new ParentsOrChildrenCacheKey( os, termsSet, direct, includeAdditionalProperties );
- Cache.ValueWrapper value = cache.get( key );
- if ( value != null ) {
- //noinspection unchecked
- return ( Set ) value.get();
- } else {
- if ( termsSet.size() > 1 ) {
+ ParentsOrChildrenCacheKey key = new ParentsOrChildrenCacheKey( os, termsSet, direct, includeAdditionalProperties );
+
+ // there might be a thread computing this cache entry
+ long initialLockAcquisitionMs = timer.getTime();
+ try ( CacheUtils.Lock ignored = CacheUtils.acquireReadLock( cache, key ) ) {
+ initialLockAcquisitionMs = timer.getTime() - initialLockAcquisitionMs;
+ Cache.ValueWrapper value = cache.get( key );
+ if ( value != null ) {
//noinspection unchecked
- HashSet keys = new HashSet<>( ( Collection ) CacheUtils.getKeys( cache ) );
-
- // try looking for k-1 or k-2 subsets
- ParentsOrChildrenCacheKey keyForSubset = lookupMaximalSubsetByCombination( keys, os, termsSet, direct, includeAdditionalProperties );
-
- // try enumerating keys (initially fast, but gets slower as the cache grows)
- if ( keyForSubset == null ) {
- keyForSubset = lookupMaximalSubsetByEnumeratingKeys( keys, os, termsSet, direct, includeAdditionalProperties );
- }
-
- if ( keyForSubset != null ) {
- Cache.ValueWrapper valueForSubset = cache.get( keyForSubset );
- if ( valueForSubset != null ) {
- //noinspection unchecked
- Set resultsForSubset = ( Set ) valueForSubset.get();
- // only query the difference
- Set remainingTerms = new HashSet<>( termsSet );
- remainingTerms.removeAll( keyForSubset.terms );
- Set remainingResults = getParentsOrChildren( os, remainingTerms, direct, includeAdditionalProperties, cache, ancestors );
- // recombine the results
- Set results = new HashSet<>( resultsForSubset );
- results.addAll( remainingResults );
- cache.put( key, results );
- return results;
- }
- }
- }
-
- // no subsets are of any use, so directly query
- try ( CacheUtils.Lock ignored = CacheUtils.acquireWriteLock( cache, key ) ) {
- // check if the entry have been computed by another thread
- value = cache.get( key );
- if ( value != null ) {
- //noinspection unchecked
- return ( Set ) value.get();
- }
- Set newVal = ancestors ?
- os.getParents( termsSet, direct, includeAdditionalProperties ) :
- os.getChildren( termsSet, direct, includeAdditionalProperties );
- cache.put( key, newVal );
- return newVal;
+ return ( Set ) value.get();
}
}
- }
-
- /**
- * A HashSet implementation with a cheap hashCode() operation.
- */
- private static class IncrementalHashSet extends HashSet {
-
- private int hashCode = 0;
-
- public IncrementalHashSet( Set terms ) {
- super( terms );
- }
- @Override
- public boolean add( T o ) {
- if ( !super.add( o ) ) {
- hashCode += o.hashCode();
- return true;
+ long lookupSubsetMs = 0;
+ ParentsOrChildrenCacheKey keyForSubset;
+ // enough terms to make it worth looking for subsets...
+ if ( termsSet.size() >= minSubsetSize + 1 ) {
+ lookupSubsetMs = timer.getTime();
+ keyForSubset = lookupMaximalSubsetByEnumeratingKeys( cache, os, termsSet, direct, includeAdditionalProperties );
+ lookupSubsetMs = timer.getTime() - lookupSubsetMs;
+ if ( lookupSubsetMs > 100 ) {
+ log.warn( String.format( "Enumerating cache keys for finding a maximal subset for %s of %s took %d ms and %s",
+ ancestors ? "parents" : "children", key, lookupSubsetMs, keyForSubset != null ? "succeeded with " + keyForSubset + " terms" : "failed" ) );
}
- return false;
+ } else {
+ // we used to enumerate all possible k-1, k-2 subsets, but that's just too slow compared to enumerating
+ // cache keys, other strategies can be implemented here if necessary
+ keyForSubset = null;
}
- @Override
- public boolean remove( Object o ) {
- if ( !super.remove( o ) ) {
- hashCode -= o.hashCode();
- return true;
+ if ( keyForSubset != null ) {
+ Cache.ValueWrapper valueForSubset = cache.get( keyForSubset );
+ if ( valueForSubset != null ) {
+ //noinspection unchecked
+ Set resultsForSubset = ( Set ) valueForSubset.get();
+ // only query the difference
+ Set remainingTerms = new HashSet<>( termsSet );
+ remainingTerms.removeAll( keyForSubset.terms );
+ Set remainingResults = getParentsOrChildren( os, remainingTerms, direct, includeAdditionalProperties, cache, ancestors );
+ // recombine the results
+ Set results = new HashSet<>( resultsForSubset );
+ results.addAll( remainingResults );
+ cache.put( key, results );
+ return results;
+ } else {
+ log.warn( "Missing expected key from the " + ( ancestors ? "parents" : "children" ) + " cache: " + keyForSubset );
}
- return false;
- }
-
- @Override
- public int hashCode() {
- return hashCode;
}
- }
- /**
- * Check if a k-1 (or k-2) subset of a given set of terms is in the given cache and query the difference.
- *
- * Because the number of subset is exponential in the number of terms, we only try subsets of size 1 and 2 if
- * {@code n < 100}.
- */
- @Nullable
- private ParentsOrChildrenCacheKey lookupMaximalSubsetByCombination( Set keys, OntologyService os, Set terms, boolean direct, boolean includeAdditionalProperties ) {
- // we will be generating subsets from this
- List orderedTerms = new ArrayList<>( terms );
- // we will be mutating this
- Set termsForSubset = new IncrementalHashSet<>( terms );
- // successively try removing k-subsets (k = 1 up to 3); it grows exponentially so careful here!
- int n = orderedTerms.size();
- // n = 100 has ~5000 2-combinations
- int maxN = n < 100 ? 2 : 1;
- // if n = k, there's only one subset, and it's the same case as if no subsets were found
- for ( int k = 1; k <= Math.min( n - 1, maxN ); k++ ) {
- for ( int[] is : new Combinations( n, k ) ) {
- for ( int i : is ) {
- termsForSubset.remove( orderedTerms.get( i ) );
- }
- // note: ParentsOrChildrenCacheKey is immutable so that the hashCode can be efficiently computed
- ParentsOrChildrenCacheKey keyForSubset = new ParentsOrChildrenCacheKey( os, termsForSubset, direct, includeAdditionalProperties );
- if ( keys.contains( keyForSubset ) ) {
- return keyForSubset;
- }
- for ( int i : is ) {
- termsForSubset.add( orderedTerms.get( i ) );
- }
+ long acquireMs = timer.getTime();
+ long computingMs = 0;
+ try ( CacheUtils.Lock ignored = CacheUtils.acquireWriteLock( cache, key ) ) {
+ acquireMs = timer.getTime() - acquireMs;
+ // lookup the cache in case another thread computed the result while we were enumerating subsets
+ Cache.ValueWrapper value = cache.get( key );
+ if ( value != null ) {
+ //noinspection unchecked
+ return ( Set ) value.get();
+ }
+ computingMs = timer.getTime();
+ // no subset found in the cache, just compute it from scratch
+ Set newVal = ancestors ?
+ os.getParents( termsSet, direct, includeAdditionalProperties ) :
+ os.getChildren( termsSet, direct, includeAdditionalProperties );
+ computingMs = timer.getTime() - computingMs;
+ // ignore empty newVal, it might just be that the ontology is not initialized yet
+ if ( !newVal.isEmpty() && computingMs < lookupSubsetMs ) {
+ log.warn( String.format( "Computing %d %s terms for %s took less time than looking up subsets, increasing the minSubsetSize might be beneficial",
+ newVal.size(),
+ ancestors ? "parents" : "children",
+ key ) );
+ }
+ cache.put( key, newVal );
+ return newVal;
+ } finally {
+ if ( timer.getTime() > 500 ) {
+ log.warn( String.format( "Retrieving %s for %s took %d ms (acquiring locks: %d ms, enumerating subsets: %d ms, computing: %d ms)",
+ ancestors ? "parents" : "children", key, timer.getTime(), initialLockAcquisitionMs + acquireMs, lookupSubsetMs, computingMs ) );
}
}
- return null;
}
/**
* Enumerate the cache's keys to find the maximal subset.
- *
- * This is less efficient than {@link #lookupMaximalSubsetByCombination(Set, OntologyService, Set, boolean, boolean)}
- * because we to verify if a subset exist for each key of the cache.
*/
@Nullable
- private ParentsOrChildrenCacheKey lookupMaximalSubsetByEnumeratingKeys( Collection keys, OntologyService os, Set terms, boolean direct, boolean includeAdditionalProperties ) {
- return keys.stream()
- .filter( k -> k.ontologyService.equals( os ) && k.direct == direct && k.includeAdditionalProperties == includeAdditionalProperties && terms.containsAll( k.terms ) )
+ private ParentsOrChildrenCacheKey lookupMaximalSubsetByEnumeratingKeys( Cache cache, OntologyService os, Set terms, boolean direct, boolean includeAdditionalProperties ) {
+ return CacheUtils.getKeys( cache ).stream()
+ .map( o -> ( ParentsOrChildrenCacheKey ) o )
+ .filter( k -> k.direct == direct && k.includeAdditionalProperties == includeAdditionalProperties && k.ontologyService.equals( os ) )
+ // ignore empty subsets, those will cause an infinite loop
+ // skip sets which are larger or equal in size, those cannot be subsets
+ .filter( k -> k.terms.size() >= minSubsetSize && k.terms.size() < terms.size() && terms.containsAll( k.terms ) )
.max( Comparator.comparingInt( k1 -> k1.terms.size() ) )
.orElse( null );
}
@@ -194,9 +197,22 @@ private ParentsOrChildrenCacheKey lookupMaximalSubsetByEnumeratingKeys( Collecti
@Value
@EqualsAndHashCode(cacheStrategy = EqualsAndHashCode.CacheStrategy.LAZY)
private static class ParentsOrChildrenCacheKey {
- ubic.basecode.ontology.providers.OntologyService ontologyService;
+ OntologyService ontologyService;
Set terms;
boolean direct;
boolean includeAdditionalProperties;
+
+ @Override
+ public String toString() {
+ return String.format( "%d terms from %s [%s] [%s]", terms.size(), ontologyService,
+ direct ? "direct" : "all",
+ includeAdditionalProperties ? "subClassOf and " + ontologyService.getAdditionalPropertyUris().size() + " additional properties" : "only subClassOf" );
+ }
+ }
+
+ @Value
+ private static class SearchCacheKey {
+ OntologyService ontologyService;
+ String query;
}
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java
index dabd33995f..254cc995e5 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java
@@ -18,10 +18,8 @@
import ubic.basecode.ontology.model.OntologyTerm;
import ubic.gemma.core.search.SearchException;
import ubic.gemma.model.common.description.Characteristic;
-import ubic.gemma.model.expression.biomaterial.BioMaterial;
-import ubic.gemma.model.expression.experiment.ExpressionExperiment;
-import ubic.gemma.model.genome.Taxon;
import ubic.gemma.model.common.description.CharacteristicValueObject;
+import ubic.gemma.model.genome.Taxon;
import javax.annotation.Nullable;
import java.util.Collection;
@@ -35,17 +33,13 @@
public interface OntologyService {
/**
- *
* Locates usages of obsolete terms in Characteristics, ignoring Gene Ontology annotations. Requires the ontologies are loaded into memory.
- *
*
- * Will also find terms that are no longer in an ontology we use.
- *
- *
+ * Will also find terms that are no longer in an ontology we use.
* @return map of value URI to a representative characteristic using the term. The latter will contain a count
- * of how many ocurrences there were.
+ * of how many occurrences there were.
*/
- Map findObsoleteTermUsage();
+ Map findObsoleteTermUsage();
/**
* Using the ontology and values in the database, for a search searchQuery given by the client give an ordered list
@@ -55,6 +49,7 @@ public interface OntologyService {
* @param useNeuroCartaOntology use neurocarta ontology
* @return characteristic vos
*/
+ @Deprecated
Collection findExperimentsCharacteristicTags( String searchQuery,
boolean useNeuroCartaOntology ) throws SearchException;
@@ -63,10 +58,10 @@ Collection findExperimentsCharacteristicTags( String
* looks like a URI, it just retrieves the term.
* For other queries, this a lucene backed search, is inexact and for general terms can return a lot of results.
*
- * @param search search
+ * @param query search query
* @return returns a collection of ontologyTerm's
*/
- Collection findTerms( String search ) throws SearchException;
+ Collection findTerms( String query ) throws SearchException;
/**
* Given a search string will first look through the characteristic database for any entries that have a match. If a
@@ -82,15 +77,14 @@ Collection findExperimentsCharacteristicTags( String
Collection findTermsInexact( String givenQueryString, @Nullable Taxon taxon ) throws SearchException;
/**
- * @return terms which are allowed for use in the Category of a Characteristic
+ * Obtain terms which are allowed for use in the category of a {@link ubic.gemma.model.common.description.Characteristic}.
*/
- Collection getCategoryTerms();
+ Set getCategoryTerms();
/**
- *
- * @return terms allowed for the predicate (relationship) in a Characteristic
+ * Obtain terms allowed for the predicate (relationship) in a {@link ubic.gemma.model.expression.experiment.Statement}.
*/
- Collection getRelationTerms();
+ Set getRelationTerms();
/**
* Obtain the parents of a collection of terms.
@@ -105,14 +99,13 @@ Collection findExperimentsCharacteristicTags( String
Set getChildren( Collection matchingTerms, boolean direct, boolean includeAdditionalProperties );
/**
- * @param uri uri
- * @return the definition of the associated OntologyTerm. This requires that the ontology be loaded.
+ * Obtain a definition for the given URI.
*/
+ @Nullable
String getDefinition( String uri );
/**
- * @param uri uri
- * @return the OntologyTerm for the specified URI.
+ * Obtain a term for the given URI.
*/
@Nullable
OntologyTerm getTerm( String uri );
@@ -122,8 +115,6 @@ Collection findExperimentsCharacteristicTags( String
*/
Set getTerms( Collection uris );
- boolean isObsolete( String uri );
-
/**
* Recreate the search indices, for ontologies that are loaded.
*/
diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java
index 953f5ab519..3bcaadbead 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java
@@ -18,6 +18,7 @@
*/
package ubic.gemma.core.ontology;
+import org.apache.commons.lang3.RandomUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.commons.logging.Log;
@@ -30,12 +31,11 @@
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
import org.springframework.core.task.AsyncTaskExecutor;
+import org.springframework.core.task.SimpleAsyncTaskExecutor;
import org.springframework.core.task.TaskExecutor;
import org.springframework.stereotype.Service;
-import ubic.basecode.ontology.model.AnnotationProperty;
-import ubic.basecode.ontology.model.OntologyProperty;
-import ubic.basecode.ontology.model.OntologyTerm;
-import ubic.basecode.ontology.model.OntologyTermSimple;
+import org.springframework.util.Assert;
+import ubic.basecode.ontology.model.*;
import ubic.basecode.ontology.providers.ExperimentalFactorOntologyService;
import ubic.basecode.ontology.providers.ObiService;
import ubic.basecode.ontology.search.OntologySearch;
@@ -63,6 +63,7 @@
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;
@@ -77,8 +78,8 @@
public class OntologyServiceImpl implements OntologyService, InitializingBean {
private static final Log log = LogFactory.getLog( OntologyServiceImpl.class.getName() );
-
private static final String
+ SEARCH_CACHE_NAME = "OntologyService.search",
PARENTS_CACHE_NAME = "OntologyService.parents",
CHILDREN_CACHE_NAME = "OntologyService.children";
@@ -91,7 +92,7 @@ public class OntologyServiceImpl implements OntologyService, InitializingBean {
@Autowired
private GeneService geneService;
@Autowired
- private AsyncTaskExecutor taskExecutor;
+ private AsyncTaskExecutor taskExecutor = new SimpleAsyncTaskExecutor();
@Autowired
private ExperimentalFactorOntologyService experimentalFactorOntologyService;
@@ -121,7 +122,7 @@ public class OntologyServiceImpl implements OntologyService, InitializingBean {
@Override
public void afterPropertiesSet() throws Exception {
- ontologyCache = new OntologyCache( cacheManager.getCache( PARENTS_CACHE_NAME ), cacheManager.getCache( CHILDREN_CACHE_NAME ) );
+ ontologyCache = new OntologyCache( cacheManager.getCache( SEARCH_CACHE_NAME ), cacheManager.getCache( PARENTS_CACHE_NAME ), cacheManager.getCache( CHILDREN_CACHE_NAME ) );
if ( ontologyServiceFactories != null && autoLoadOntologies ) {
List enabledOntologyServices = ontologyServiceFactories.stream()
.map( factory -> {
@@ -259,14 +260,7 @@ public Collection findTerms( String search ) throws BaseCodeOntolo
* URI input: just retrieve the term.
*/
if ( search.startsWith( "http://" ) ) {
- return combineInThreads( ontology -> {
- OntologyTerm found = ontology.getTerm( search );
- if ( found != null ) {
- return Collections.singleton( found );
- } else {
- return Collections.emptySet();
- }
- } );
+ return Collections.singleton( findFirst( ontology -> ontology.getTerm( search ), "terms matching " + search ) );
}
Collection results = new HashSet<>();
@@ -280,17 +274,19 @@ public Collection findTerms( String search ) throws BaseCodeOntolo
return results;
}
- results = searchInThreads( ontology -> ontology.findTerm( query ) );
+ results = searchInThreads( ontology -> ontologyCache.findTerm( ontology, query ), query );
if ( geneOntologyService.isOntologyLoaded() ) {
try {
- results.addAll( geneOntologyService.findTerm( search ) );
+ results.addAll( ontologyCache.findTerm( geneOntologyService, search ) );
} catch ( OntologySearchException e ) {
throw new BaseCodeOntologySearchException( e );
}
}
- return results;
+ return results.stream()
+ .sorted( Comparator.comparing( OntologyTerm::getScore, Comparator.nullsLast( Comparator.reverseOrder() ) ) )
+ .collect( Collectors.toCollection( LinkedHashSet::new ) );
}
@Override
@@ -304,7 +300,7 @@ public Collection findTermsInexact( String givenQuery
String queryString = OntologySearch.stripInvalidCharacters( givenQueryString );
if ( StringUtils.isBlank( queryString ) ) {
OntologyServiceImpl.log.warn( "The query was not valid (ended up being empty): " + givenQueryString );
- return new HashSet<>();
+ return Collections.emptySet();
}
if ( OntologyServiceImpl.log.isDebugEnabled() ) {
@@ -326,18 +322,18 @@ public Collection findTermsInexact( String givenQuery
Set ontologySearchResults = new HashSet<>();
ontologySearchResults.addAll( searchInThreads( service -> {
Collection results2;
- results2 = service.findTerm( queryString );
+ results2 = ontologyCache.findTerm( service, queryString );
if ( results2.isEmpty() )
return Collections.emptySet();
return CharacteristicValueObject.characteristic2CharacteristicVO( this.termsToCharacteristics( results2 ) );
- } ) );
+ }, queryString ) );
// get GO terms, if we don't already have a lot of possibilities. (might have to adjust this)
StopWatch findGoTerms = StopWatch.createStarted();
if ( geneOntologyService.isOntologyLoaded() ) {
try {
ontologySearchResults.addAll( CharacteristicValueObject.characteristic2CharacteristicVO(
- this.termsToCharacteristics( geneOntologyService.findTerm( queryString ) ) ) );
+ this.termsToCharacteristics( ontologyCache.findTerm( geneOntologyService, queryString ) ) ) );
} catch ( OntologySearchException e ) {
throw new BaseCodeOntologySearchException( e );
}
@@ -368,9 +364,9 @@ public Collection findTermsInexact( String givenQuery
countOccurrencesTimerAfter.stop();
// Sort the results rather elaborately.
- Collection sortedResults = results.values().stream()
+ LinkedHashSet sortedResults = results.values().stream()
.sorted( getCharacteristicComparator( queryString ) )
- .collect( Collectors.toList() );
+ .collect( Collectors.toCollection( LinkedHashSet::new ) );
watch.stop();
@@ -388,35 +384,69 @@ public Collection findTermsInexact( String givenQuery
@Override
public Set getParents( Collection terms, boolean direct, boolean includeAdditionalProperties ) {
- Set toQuery = new HashSet<>( terms );
- Set results = new HashSet<>();
- while ( !toQuery.isEmpty() ) {
- Set newResults = combineInThreads( os -> ontologyCache.getParents( os, toQuery, direct, includeAdditionalProperties ) );
- results.addAll( newResults );
- // toQuery = newResults - toQuery
- newResults.removeAll( toQuery );
- toQuery.clear();
- toQuery.addAll( newResults );
- }
- return results;
+ return getParentsOrChildren( terms, direct, includeAdditionalProperties, true );
}
@Override
public Set getChildren( Collection terms, boolean direct, boolean includeAdditionalProperties ) {
+ return getParentsOrChildren( terms, direct, includeAdditionalProperties, false );
+ }
+
+ private Set getParentsOrChildren( Collection terms, boolean direct, boolean includeAdditionalProperties, boolean parents ) {
+ if ( terms.isEmpty() ) {
+ return Collections.emptySet();
+ }
Set toQuery = new HashSet<>( terms );
- Set results = new HashSet<>();
+ List results = new ArrayList<>();
while ( !toQuery.isEmpty() ) {
- Set newResults = combineInThreads( os -> ontologyCache.getChildren( os, toQuery, direct, includeAdditionalProperties ) );
- results.addAll( newResults );
- newResults.removeAll( toQuery );
- toQuery.clear();
- toQuery.addAll( newResults );
+ List newResults = combineInThreads( os -> {
+ StopWatch timer = StopWatch.createStarted();
+ try {
+ return parents ? ontologyCache.getParents( os, toQuery, direct, includeAdditionalProperties )
+ : ontologyCache.getChildren( os, toQuery, direct, includeAdditionalProperties );
+ } finally {
+ if ( timer.getTime() > Math.max( 10L * terms.size(), 500L ) ) {
+ log.warn( String.format( "Obtaining %s from %s for %s took %d ms",
+ parents ? "parents" : "children",
+ os,
+ terms.size() == 1 ? terms.iterator().next() : terms.size() + " terms",
+ timer.getTime() ) );
+ }
+ }
+ }, String.format( "%s %s of %d terms", direct ? "direct" : "all", parents ? "parents" : "children", terms.size() ) );
+
+ if ( results.addAll( newResults ) && !direct ) {
+ // there are new results (i.e. a term was inferred from a different ontology), we need to requery them
+ // if they were not in the query
+ newResults.removeAll( toQuery );
+ toQuery.clear();
+ toQuery.addAll( newResults );
+ log.debug( String.format( "Found %d new %s terms, will requery them.", newResults.size(),
+ parents ? "parents" : "children" ) );
+ } else {
+ toQuery.clear();
+ }
}
- return results;
+
+ // when an ontology returns a result without a label, it might be referring to another ontology, so we attempt
+ // to retrieve a results with a label as a replacement
+ Set resultsWithMissingLabels = results.stream()
+ .filter( t -> t.getLabel() == null )
+ .map( OntologyResource::getUri )
+ .collect( Collectors.toSet() );
+ if ( !resultsWithMissingLabels.isEmpty() ) {
+ Set replacements = getTerms( resultsWithMissingLabels );
+ results.removeAll( replacements );
+ results.addAll( replacements );
+ }
+
+ // drop terms without labels
+ results.removeIf( t -> t.getLabel() == null );
+ return new HashSet<>( results );
}
@Override
- public Collection getCategoryTerms() {
+ public Set getCategoryTerms() {
return categoryTerms.stream()
.map( term -> {
String termUri = term.getUri();
@@ -436,25 +466,19 @@ public Collection getCategoryTerms() {
@Override
- public Collection getRelationTerms() {
+ public Set getRelationTerms() {
// FIXME: it's not quite like categoryTerms so this map operation is probably not needed at all, the relations don't come from any particular ontology
- return relationTerms.stream()
- .map( term -> {
- return term;
- } )
- .collect( Collectors.toSet() );
+ return Collections.unmodifiableSet( relationTerms );
}
@Override
public String getDefinition( String uri ) {
- if ( uri == null ) return null;
OntologyTerm ot = this.getTerm( uri );
if ( ot != null ) {
- for ( AnnotationProperty ann : ot.getAnnotations() ) {
- // FIXME: not clear this will work with all ontologies. UBERON, HP, MP, MONDO does it this way.
- if ( "http://purl.obolibrary.org/obo/IAO_0000115".equals( ann.getUri() ) ) {
- return ann.getContents();
- }
+ // FIXME: not clear this will work with all ontologies. UBERON, HP, MP, MONDO does it this way.
+ AnnotationProperty annot = ot.getAnnotation( "http://purl.obolibrary.org/obo/IAO_0000115" );
+ if ( annot != null ) {
+ return annot.getContents();
}
}
return null;
@@ -464,46 +488,31 @@ public String getDefinition( String uri ) {
public OntologyTerm getTerm( String uri ) {
return findFirst( ontology -> {
OntologyTerm term = ontology.getTerm( uri );
- // some terms mentioned, but not declared in some ontologies (see https://github.com/PavlidisLab/Gemma/issues/998)
- // FIXME: baseCode should return null if there is no , not default the local name or URI
- if ( term != null && ( term.getLabel() == null || term.getLabel().equals( term.getUri() ) ) ) {
+ if ( term != null && term.getLabel() == null ) {
return null;
}
return term;
- } );
+ }, uri );
}
@Override
public Set getTerms( Collection uris ) {
Set distinctUris = uris instanceof Set ? ( Set ) uris : new HashSet<>( uris );
- return combineInThreads( os -> distinctUris.stream().map( os::getTerm ).filter( Objects::nonNull ).collect( Collectors.toSet() ) );
- }
-
- /**
- * @return true if the Uri is an ObsoleteClass. This will only work if the ontology in question is loaded.
- */
- @Override
- public boolean isObsolete( String uri ) {
- if ( uri == null )
- return false;
- OntologyTerm t = this.getTerm( uri );
- return t != null && t.isObsolete();
+ List results = combineInThreads( os -> distinctUris.stream().map( os::getTerm ).filter( Objects::nonNull ).collect( Collectors.toSet() ),
+ String.format( "terms for %d URIs", uris.size() ) );
+ results.removeIf( t -> t.getLabel() == null );
+ return new HashSet<>( results );
}
@Override
public void reindexAllOntologies() {
for ( ubic.basecode.ontology.providers.OntologyService serv : this.ontologyServices ) {
- if ( serv.isOntologyLoaded() ) {
- OntologyServiceImpl.log.info( "Reindexing: " + serv );
- try {
+ if ( serv.isEnabled() && serv.isSearchEnabled() ) {
+ ontologyTaskExecutor.execute( () -> {
+ OntologyServiceImpl.log.info( "Reindexing " + serv + "..." );
serv.index( true );
- } catch ( Exception e ) {
- OntologyServiceImpl.log.error( "Failed to index " + serv + ": " + e.getMessage(), e );
- }
- } else {
- if ( serv.isEnabled() )
- OntologyServiceImpl.log
- .info( "Not available for reindexing (not enabled or finished initialization): " + serv );
+ ontologyCache.clearSearchCacheByOntology( serv );
+ } );
}
}
}
@@ -511,10 +520,19 @@ public void reindexAllOntologies() {
@Override
public void reinitializeAndReindexAllOntologies() {
for ( ubic.basecode.ontology.providers.OntologyService serv : this.ontologyServices ) {
- ontologyTaskExecutor.execute( () -> {
- serv.initialize( true, true );
- ontologyCache.clearByOntology( serv );
- } );
+ if ( serv.isOntologyLoaded() ) {
+ if ( serv.isEnabled() ) {
+ boolean isSearchEnabled = serv.isSearchEnabled();
+ ontologyTaskExecutor.execute( () -> {
+ OntologyServiceImpl.log.info( "Reinitializing " + serv + "..." );
+ serv.initialize( true, isSearchEnabled );
+ ontologyCache.clearByOntology( serv );
+ if ( isSearchEnabled ) {
+ ontologyCache.clearSearchCacheByOntology( serv );
+ }
+ } );
+ }
+ }
}
}
@@ -566,20 +584,17 @@ private Characteristic termToCharacteristic( OntologyTerm res ) {
}
@Override
- public Map findObsoleteTermUsage() {
- Map vos = new HashMap<>();
-
- int start = 0;
- int step = 5000;
+ public Map findObsoleteTermUsage() {
+ Map results = new HashMap<>();
int prevObsoleteCnt = 0;
int checked = 0;
- CharacteristicValueObject lastObsolete = null;
-
- while ( true ) {
+ Characteristic lastObsolete = null;
+ long total = characteristicService.countAll();
+ int step = 5000;
+ for ( int start = 0; ; start += step ) {
Collection chars = characteristicService.browse( start, step );
- start += step;
if ( chars == null || chars.isEmpty() ) {
break;
@@ -593,35 +608,30 @@ public Map findObsoleteTermUsage() {
checked++;
- if ( this.getTerm( valueUri ) == null || this.isObsolete( valueUri ) ) {
-
+ OntologyTerm term = this.getTerm( valueUri );
+ if ( term != null && term.isObsolete() ) {
if ( valueUri.startsWith( "http://purl.org/commons/record/ncbi_gene" ) || valueUri.startsWith( "http://purl.obolibrary.org/obo/GO_" ) ) {
// these are false positives, they aren't in an ontology, and we aren't looking at GO Terms.
continue;
}
-
-
- if ( !vos.containsKey( valueUri ) ) {
- vos.put( valueUri, new CharacteristicValueObject( ch ) );
- }
- vos.get( valueUri ).incrementOccurrenceCount();
+ results.compute( ch, ( k, v ) -> v == null ? 1L : v + 1L );
if ( log.isDebugEnabled() )
OntologyServiceImpl.log.debug( "Found obsolete or missing term: " + ch.getValue() + " - " + valueUri );
- lastObsolete = vos.get( valueUri );
+ lastObsolete = ch;
}
}
- if ( vos.size() > prevObsoleteCnt ) {
- OntologyServiceImpl.log.info( "Found " + vos.size() + " obsolete or missing terms so far, tested " + checked + " characteristics" );
+ if ( results.size() > prevObsoleteCnt ) {
+ OntologyServiceImpl.log.info( "Found " + results.size() + " obsolete or missing terms so far, tested " + checked + " out of " + total + " characteristics" );
OntologyServiceImpl.log.info( "Last obsolete term seen: " + lastObsolete.getValue() + " - " + lastObsolete.getValueUri() );
}
- prevObsoleteCnt = vos.size();
+ prevObsoleteCnt = results.size();
}
- OntologyServiceImpl.log.info( "Done, obsolete or missing terms found: " + vos.size() );
+ OntologyServiceImpl.log.info( "Done, obsolete or missing terms found: " + results.size() );
- return vos;
+ return results;
}
private void searchForCharacteristics( String queryString, Map previouslyUsedInSystem ) {
@@ -680,7 +690,7 @@ private Collection findCharacteristicsFromOntology( S
}
return searchInThreads( ontologyService -> {
- Collection ontologyTerms = ontologyService.findTerm( searchQuery );
+ Collection ontologyTerms = ontologyCache.findTerm( ontologyService, searchQuery );
Collection characteristicsFromOntology = new HashSet<>();
for ( OntologyTerm ontologyTerm : ontologyTerms ) {
// if the ontology term wasnt already found in the database
@@ -694,14 +704,7 @@ private Collection findCharacteristicsFromOntology( S
}
}
return characteristicsFromOntology;
- }, ontologyServicesToUse );
- }
-
- private String foundValueKey( Characteristic c ) {
- if ( StringUtils.isNotBlank( c.getValueUri() ) ) {
- return c.getValueUri().toLowerCase();
- }
- return c.getValue().toLowerCase();
+ }, ontologyServicesToUse, "terms matching " + searchQuery );
}
/**
@@ -846,24 +849,25 @@ static Comparator getCharacteristicComparator( String
.thenComparing( CharacteristicValueObject::getNumTimesUsed, Comparator.reverseOrder() ) // most frequently used first
.thenComparing( CharacteristicValueObject::isAlreadyPresentInDatabase, Comparator.reverseOrder() ) // already used terms first
.thenComparing( c -> c.getValue() != null ? c.getValue().length() : null, Comparator.nullsLast( Comparator.naturalOrder() ) ); // shorter term first
-
}
/**
* Find the first non-null result among loaded ontology services.
*/
@Nullable
- private T findFirst( Function function ) {
+ private T findFirst( Function function, String query ) {
List> futures = new ArrayList<>( ontologyServices.size() );
+ List