diff --git a/.editorconfig b/.editorconfig index 5933aeedee..422f746882 100644 --- a/.editorconfig +++ b/.editorconfig @@ -4,9 +4,6 @@ root = true indent_style=space indent_size=4 -[init-indices.sql] -indent_size=2 - [/pom.xml] indent_style=tab diff --git a/.idea/runConfigurations/Deploy__dev_.xml b/.idea/runConfigurations/Deploy__dev_.xml new file mode 100644 index 0000000000..3e16af9e38 --- /dev/null +++ b/.idea/runConfigurations/Deploy__dev_.xml @@ -0,0 +1,21 @@ + + + + \ No newline at end of file diff --git a/.idea/runConfigurations/Deploy__staging_.xml b/.idea/runConfigurations/Deploy__staging_.xml new file mode 100644 index 0000000000..d834542fa1 --- /dev/null +++ b/.idea/runConfigurations/Deploy__staging_.xml @@ -0,0 +1,21 @@ + + + + \ No newline at end of file diff --git a/gemma-cli/pom.xml b/gemma-cli/pom.xml index eba8b0470c..3b55dacd79 100644 --- a/gemma-cli/pom.xml +++ b/gemma-cli/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.31.2 + 1.31.3 4.0.0 gemma-cli diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignBlatCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignBlatCli.java index c0c6075aa2..d25cfc1d96 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignBlatCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignBlatCli.java @@ -73,7 +73,7 @@ protected void buildOptions( Options options ) { .desc( "Threshold (0-1.0) for acceptance of BLAT alignments [Default = " + this.blatScoreThreshold + "]" ) .longOpt( "scoreThresh" ) - .type( Double.class ) + .type( Number.class ) .build(); options.addOption( Option.builder( "sensitive" ).desc( "Run on more sensitive server, if available" ).build() ); @@ -107,7 +107,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException { // } if ( commandLine.hasOption( 's' ) ) { - this.blatScoreThreshold = ( Double ) commandLine.getParsedOptionValue( 's' ); + this.blatScoreThreshold = ( ( Number ) commandLine.getParsedOptionValue( 's' ) ).doubleValue(); } TaxonService taxonService = this.getBean( TaxonService.class ); diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignProbeMapperCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignProbeMapperCli.java index 7f0ba64765..2bc30977cd 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignProbeMapperCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignProbeMapperCli.java @@ -87,17 +87,17 @@ protected void buildOptions( Options options ) { super.buildOptions( options ); options.addOption( Option.builder( "i" ).hasArg().argName( "value" ) - .type( Double.class ) + .type( Number.class ) .desc( "Sequence identity threshold, default = " + ProbeMapperConfig.DEFAULT_IDENTITY_THRESHOLD ) .longOpt( "identityThreshold" ).build() ); options.addOption( Option.builder( "s" ).hasArg().argName( "value" ) - .type( Double.class ) + .type( Number.class ) .desc( "Blat score threshold, default = " + ProbeMapperConfig.DEFAULT_SCORE_THRESHOLD ) .longOpt( "scoreThreshold" ).build() ); options.addOption( Option.builder( "o" ).hasArg().argName( "value" ) - .type( Double.class ) + .type( Number.class ) .desc( "Minimum fraction of probe overlap with exons, default = " + ProbeMapperConfig.DEFAULT_MINIMUM_EXON_OVERLAP_FRACTION ) .longOpt( "overlapThreshold" ) .build() ); @@ -234,7 +234,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException { } if ( commandLine.hasOption( 's' ) ) { - blatScoreThreshold = ( Double ) commandLine.getParsedOptionValue( 's' ); + blatScoreThreshold = ( ( Number ) commandLine.getParsedOptionValue( 's' ) ).doubleValue(); if ( blatScoreThreshold < 0 || blatScoreThreshold > 1 ) { throw new IllegalArgumentException( "BLAT score threshold must be between 0 and 1" ); } @@ -249,14 +249,14 @@ protected void processOptions( CommandLine commandLine ) throws ParseException { this.mirnaOnlyModeOption = commandLine.hasOption( ArrayDesignProbeMapperCli.MIRNA_ONLY_MODE_OPTION ); if ( commandLine.hasOption( 'i' ) ) { - identityThreshold = ( Double ) commandLine.getParsedOptionValue( 'i' ); + identityThreshold = ( ( Number ) commandLine.getParsedOptionValue( 'i' ) ).doubleValue(); if ( identityThreshold < 0 || identityThreshold > 1 ) { throw new IllegalArgumentException( "Identity threshold must be between 0 and 1" ); } } if ( commandLine.hasOption( 'o' ) ) { - overlapThreshold = ( Double ) commandLine.getParsedOptionValue( 'o' ); + overlapThreshold = ( ( Number ) commandLine.getParsedOptionValue( 'o' ) ).doubleValue(); if ( overlapThreshold < 0 || overlapThreshold > 1 ) { throw new IllegalArgumentException( "Overlap threshold must be between 0 and 1" ); } diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java index effbe261f3..e7efebba37 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java @@ -8,7 +8,7 @@ import org.springframework.core.task.AsyncTaskExecutor; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.util.AbstractCLI; -import ubic.gemma.model.common.description.CharacteristicValueObject; +import ubic.gemma.model.common.description.Characteristic; import java.util.LinkedHashMap; import java.util.List; @@ -90,14 +90,13 @@ protected void doWork() throws Exception { log.info( "Ontologies warmed up, starting check..." ); - Map vos = ontologyService.findObsoleteTermUsage(); + Map vos = ontologyService.findObsoleteTermUsage(); AbstractCLI.log.info( "Obsolete term check finished, printing ..." ); System.out.println( "Value\tValueUri\tCount" ); - for ( CharacteristicValueObject vo : vos.values() ) { - System.out.println( vo.getValue() + "\t" + vo.getValueUri() + "\t" + vo.getNumTimesUsed() ); + for ( Map.Entry vo : vos.entrySet() ) { + System.out.println( vo.getKey().getValue() + "\t" + vo.getKey().getValueUri() + "\t" + vo.getValue() ); } - } } diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/IndexGemmaCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/IndexGemmaCLI.java index 94a9e68c4c..f4a5cb0b70 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/IndexGemmaCLI.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/IndexGemmaCLI.java @@ -4,6 +4,7 @@ import org.apache.commons.cli.Options; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; import ubic.gemma.core.search.IndexerService; import ubic.gemma.core.util.AbstractCLI; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; @@ -17,26 +18,26 @@ import ubic.gemma.model.genome.gene.GeneSet; import java.io.File; +import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.stream.Collectors; +@Component public class IndexGemmaCLI extends AbstractCLI { - private static final String THREADS_OPTION = "threads"; - /** * A list of all searchable entities this CLI supports. */ private static final IndexableEntity[] indexableEntities = { - new IndexableEntity( "g", "genes", Gene.class ), - new IndexableEntity( "e", "datasets", ExpressionExperiment.class ), - new IndexableEntity( "a", "platforms", ArrayDesign.class ), - new IndexableEntity( "b", "bibliographic references", BibliographicReference.class ), - new IndexableEntity( "s", "probes", CompositeSequence.class ), - new IndexableEntity( "q", "sequences", BioSequence.class ), - new IndexableEntity( "x", "datasets groups", ExpressionExperimentSet.class ), - new IndexableEntity( "y", "gene sets", GeneSet.class ) + new IndexableEntity( "g", "genes", Gene.class, 1000 ), + new IndexableEntity( "e", "datasets", ExpressionExperiment.class, 1000 ), + new IndexableEntity( "a", "platforms", ArrayDesign.class, 100 ), + new IndexableEntity( "b", "bibliographic references", BibliographicReference.class, 1000 ), + new IndexableEntity( "s", "probes", CompositeSequence.class, 100000 ), + new IndexableEntity( "q", "sequences", BioSequence.class, 100000 ), + new IndexableEntity( "x", "datasets groups", ExpressionExperimentSet.class, 100 ), + new IndexableEntity( "y", "gene sets", GeneSet.class, 10 ) }; @lombok.Value @@ -44,6 +45,7 @@ private static class IndexableEntity { String option; String description; Class clazz; + int loggingFrequency; } @Autowired @@ -52,8 +54,7 @@ private static class IndexableEntity { @Value("${gemma.search.dir}") private File searchDir; - private final Set> classesToIndex = new HashSet<>(); - private int numThreads; + private final Set classesToIndex = new HashSet<>(); @Override public String getCommandName() { @@ -82,21 +83,28 @@ protected void buildOptions( Options options ) { protected void processOptions( CommandLine commandLine ) { for ( IndexableEntity ie : indexableEntities ) { if ( commandLine.hasOption( ie.option ) ) { - classesToIndex.add( ie.clazz ); + classesToIndex.add( ie ); } } + if ( classesToIndex.isEmpty() ) { + classesToIndex.addAll( Arrays.asList( indexableEntities ) ); + } + indexerService.setNumThreads( getNumThreads() ); } @Override protected void doWork() throws Exception { - if ( classesToIndex.isEmpty() ) { - log.info( String.format( "All entities will be indexed under %s.", searchDir.getAbsolutePath() ) ); - indexerService.index( getNumThreads() ); - } else { + if ( classesToIndex.size() < indexableEntities.length ) { log.info( String.format( "The following entities will be indexed under %s:\n\t%s", searchDir.getAbsolutePath(), - classesToIndex.stream().map( Class::getName ).collect( Collectors.joining( "\n\t" ) ) ) ); - indexerService.index( classesToIndex, getNumThreads() ); + classesToIndex.stream().map( IndexableEntity::getClazz ).map( Class::getName ).collect( Collectors.joining( "\n\t" ) ) ) ); + } else { + log.info( String.format( "All entities will be indexed under %s.", searchDir.getAbsolutePath() ) ); + } + for ( IndexableEntity classToIndex : classesToIndex ) { + log.info( "Indexing " + classToIndex.getClazz().getName() + "..." ); + indexerService.setLoggingFrequency( classToIndex.loggingFrequency ); + indexerService.index( classToIndex.clazz ); } } } diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java index 7f965191b3..8990039f6b 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java @@ -291,7 +291,7 @@ protected void buildOptions( Options options ) { .build(); options.addOption( chooseCutOption ); - options.addOption( Option.builder( "probeDegreeLim" ).hasArg().type( Integer.class ).build() ); + options.addOption( Option.builder( "probeDegreeLim" ).hasArg().type( Number.class ).build() ); // finer-grained control is possible, of course. Option skipQC = Option.builder( "noqc" ) @@ -427,7 +427,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException { } if ( commandLine.hasOption( "probeDegreeLim" ) ) { - this.linkAnalysisConfig.setProbeDegreeThreshold( ( Integer ) commandLine.getParsedOptionValue( "probeDegreeLim" ) ); + this.linkAnalysisConfig.setProbeDegreeThreshold( ( ( Number ) commandLine.getParsedOptionValue( "probeDegreeLim" ) ).intValue() ); } } diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java new file mode 100644 index 0000000000..9a457755c3 --- /dev/null +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java @@ -0,0 +1,52 @@ +package ubic.gemma.core.apps; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.springframework.beans.factory.annotation.Autowired; +import ubic.gemma.core.util.AbstractAuthenticatedCLI; +import ubic.gemma.persistence.service.TableMaintenanceUtil; + +import javax.annotation.Nullable; + +public class UpdateEE2CCli extends AbstractAuthenticatedCLI { + + private static final String TRUNCATE_OPTION = "truncate"; + + @Autowired + private TableMaintenanceUtil tableMaintenanceUtil; + + private boolean truncate; + + @Override + protected void buildOptions( Options options ) { + options.addOption( TRUNCATE_OPTION, "truncate", false, "Truncate the table before updating it" ); + } + + @Override + protected void processOptions( CommandLine commandLine ) throws ParseException { + truncate = commandLine.hasOption( TRUNCATE_OPTION ); + } + + @Override + protected void doWork() throws Exception { + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( truncate ); + } + + @Nullable + @Override + public String getCommandName() { + return "updateEe2c"; + } + + @Nullable + @Override + public String getShortDesc() { + return "Update the EXPRESSION_EXPERIMENT2CHARACTERISTIC table"; + } + + @Override + public GemmaCLI.CommandGroup getCommandGroup() { + return GemmaCLI.CommandGroup.EXPERIMENT; + } +} diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java index 2dff2278ea..ab56b5e2da 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java @@ -273,7 +273,7 @@ protected void addDateOption( Options options ) { protected void addThreadsOption( Options options ) { options.addOption( Option.builder( THREADS_OPTION ).argName( "numThreads" ).hasArg() .desc( "Number of threads to use for batch processing." ) - .type( Integer.class ) + .type( Number.class ) .build() ); } @@ -349,7 +349,7 @@ protected void processStandardOptions( CommandLine commandLine ) throws ParseExc this.autoSeek = commandLine.hasOption( AbstractCLI.AUTO_OPTION_NAME ); if ( commandLine.hasOption( THREADS_OPTION ) ) { - this.numThreads = ( Integer ) commandLine.getParsedOptionValue( THREADS_OPTION ); + this.numThreads = ( ( Number ) commandLine.getParsedOptionValue( THREADS_OPTION ) ).intValue(); if ( this.numThreads < 1 ) { throw new IllegalArgumentException( "Number of threads must be greater than 1." ); } diff --git a/gemma-core/pom.xml b/gemma-core/pom.xml index 31a8933710..d4e5be3677 100644 --- a/gemma-core/pom.xml +++ b/gemma-core/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.31.2 + 1.31.3 4.0.0 gemma-core @@ -216,8 +216,9 @@ ${project.build.directory}/schema/gemma/gsec/sql/gsec-acl-ddl.sql ${project.build.directory}/schema/gemma/gsec/sql/init-acl-indices.sql ${project.basedir}/src/main/resources/sql/init-acls.sql - ${project.basedir}/src/main/resources/sql/init-indices.sql ${project.basedir}/src/main/resources/sql/init-entities.sql + ${project.basedir}/src/main/resources/sql/mysql/init-entities.sql + ${project.basedir}/src/main/resources/sql/init-data.sql ${skipIntegrationTests} diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java index 618a8dbe83..2f06737a19 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java @@ -133,8 +133,8 @@ private static Collection factorBatchConfoundTest( BioAssaySet ee assert numBioMaterials > 0 : "No biomaterials for " + ef; double p = Double.NaN; - double chiSquare; - int df; + double chiSquare = Double.NaN; + int df = 0; int numBatches = batchFactor.getFactorValues().size(); if ( ExperimentalDesignUtils.isContinuous( ef ) ) { @@ -238,8 +238,14 @@ private static Collection factorBatchConfoundTest( BioAssaySet ee continue; // to the next factor } + /* + * The problem with chi-square test is it is underpowered and we don't detect perfect confounds + * when the sample size is small e.g. 3 + 3. + * So for small sample sizes we apply some special cases 1) when we have a 2x2 table and 3) when we have a small number of batches and observations. + * Otherwise we use the chisquare test. + */ ChiSquareTest cst = new ChiSquareTest(); - + // initialize this value; we'll use it when my special test doesn't turn up anything. try { chiSquare = cst.chiSquare( finalCounts ); } catch ( IllegalArgumentException e ) { @@ -248,10 +254,49 @@ private static Collection factorBatchConfoundTest( BioAssaySet ee chiSquare = Double.NaN; } + if ( finalCounts.length == 2 && finalCounts[0].length == 2 ) { // treat as odds ratio computation + double numerator = ( double ) finalCounts[0][0] * finalCounts[1][1]; + double denominator = ( double ) finalCounts[0][1] * finalCounts[1][0]; + + // if either value is zero, we have a perfect confound + if ( numerator == 0 || denominator == 0 ) { + chiSquare = Double.POSITIVE_INFINITY; // effectively we shift to fisher's exact test here. + } + + } else if ( numBioMaterials <= 10 && finalCounts.length <= 4 ) { // number of batches and number of samples is small + + // look for pairs of rows and columns where there is only one non-zero value in each, which would be a confound. + for ( int r = 0; r < finalCounts.length; r++ ) { + int numNonzero = 0; + int nonZeroIndex = -1; + for ( int c = 0; c < finalCounts[0].length; c++ ) { + if ( finalCounts[r][c] != 0 ) { + numNonzero++; + nonZeroIndex = c; + } + } + // inspect the column + if ( numNonzero == 1 ) { + int numNonzeroColumnVals = 0; + for ( int r2 = 0; r2 < finalCounts.length; r2++ ) { + if ( finalCounts[r2][nonZeroIndex] != 0 ) { + numNonzeroColumnVals++; + } + } + if ( numNonzeroColumnVals == 1 ) { + chiSquare = Double.POSITIVE_INFINITY; + break; + } + } + } + } + df = ( finalCounts.length - 1 ) * ( finalCounts[0].length - 1 ); ChiSquaredDistribution distribution = new ChiSquaredDistribution( df ); - if ( !Double.isNaN( chiSquare ) ) { + if ( chiSquare == Double.POSITIVE_INFINITY ) { + p = 0.0; + } else if ( !Double.isNaN( chiSquare ) ) { p = 1.0 - distribution.cumulativeProbability( chiSquare ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ArrayDesignReportServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ArrayDesignReportServiceImpl.java index 61b3d626e8..9e08bf31ef 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ArrayDesignReportServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ArrayDesignReportServiceImpl.java @@ -44,7 +44,7 @@ /** * @author jsantos */ -@Component +@Component("arrayDesignReportService") public class ArrayDesignReportServiceImpl implements ArrayDesignReportService { private final static String HOME_DIR = Settings.getString( "gemma.appdata.home" ); private final static Log log = LogFactory.getLog( ArrayDesignReportServiceImpl.class ); diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java index 07cf730e2c..090a155674 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java @@ -57,7 +57,7 @@ * @author paul * @author klc */ -@Service +@Service("expressionExperimentReportService") public class ExpressionExperimentReportServiceImpl implements ExpressionExperimentReportService, InitializingBean { private static final String NOTE_UPDATED_CONFOUND = "Updated batch confound"; diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/WhatsNewServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/WhatsNewServiceImpl.java index 75f49d531c..fa4c1dcdc6 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/WhatsNewServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/WhatsNewServiceImpl.java @@ -52,7 +52,7 @@ * * @author pavlidis */ -@Component +@Component("whatsNewService") @SuppressWarnings({ "unused", "WeakerAccess" }) // Possible external use public class WhatsNewServiceImpl implements WhatsNewService { diff --git a/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java b/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java index 36191a53a8..06239e13dc 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java +++ b/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java @@ -18,19 +18,16 @@ */ package ubic.gemma.core.externalDb; +import com.zaxxer.hikari.HikariDataSource; import lombok.Getter; -import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.springframework.jdbc.core.JdbcTemplate; -import org.springframework.jdbc.datasource.SimpleDriverDataSource; import ubic.gemma.model.common.description.DatabaseType; import ubic.gemma.model.common.description.ExternalDatabase; import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.util.Settings; -import java.sql.Driver; - /** * Perform useful queries against GoldenPath (UCSC) databases. * @@ -57,40 +54,30 @@ public GoldenPath( Taxon taxon ) { private static JdbcTemplate createJdbcTemplateFromConfig( Taxon taxon ) { String host; - int port; - String user; - String password; String databaseName = getDbNameForTaxon( taxon ); - host = Settings.getString( "gemma.goldenpath.db.host" ); - port = Settings.getInt( "gemma.goldenpath.db.port", 3306 ); - - user = Settings.getString( "gemma.goldenpath.db.user" ); - password = Settings.getString( "gemma.goldenpath.db.password" ); - - SimpleDriverDataSource dataSource = new SimpleDriverDataSource(); - String url = "jdbc:mysql://" + host + ":" + port + "/" + databaseName + "?relaxAutoCommit=true&useSSL=false"; + // SimpleDriverDataSource dataSource = new SimpleDriverDataSource(); + HikariDataSource dataSource = new HikariDataSource(); + dataSource.setPoolName( "goldenpath" ); + String driverClassName = Settings.getString( "gemma.goldenpath.db.driver" ); + String url = Settings.getString( "gemma.goldenpath.db.url" ); + String user = Settings.getString( "gemma.goldenpath.db.user" ); + String password = Settings.getString( "gemma.goldenpath.db.password" ); GoldenPath.log.info( "Connecting to " + databaseName ); GoldenPath.log.debug( "Connecting to Golden Path : " + url + " as " + user ); - String driver = Settings.getString( "gemma.goldenpath.db.driver" ); - if ( StringUtils.isBlank( driver ) ) { - driver = Settings.getString( "gemma.db.driver" ); - GoldenPath.log.warn( "No DB driver configured for GoldenPath, falling back on gemma.db.driver=" + driver ); - } - try { - //noinspection unchecked - dataSource.setDriverClass( ( Class ) Class.forName( driver ) ); - } catch ( ClassNotFoundException e ) { - throw new RuntimeException( e ); - } - dataSource.setUrl( url ); + dataSource.setDriverClassName( driverClassName ); + dataSource.setJdbcUrl( url ); dataSource.setUsername( user ); dataSource.setPassword( password ); + dataSource.setMaximumPoolSize( Settings.getInt( "gemma.goldenpath.db.maximumPoolSize" ) ); + dataSource.addDataSourceProperty( "relaxAutoCommit", "true" ); JdbcTemplate jdbcTemplate = new JdbcTemplate( dataSource ); jdbcTemplate.setFetchSize( 50 ); + jdbcTemplate.execute( "use " + databaseName ); + return jdbcTemplate; } diff --git a/gemma-core/src/main/java/ubic/gemma/core/genome/gene/service/GeneSearchServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/genome/gene/service/GeneSearchServiceImpl.java index 2d5eef3706..a361b3ba9a 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/genome/gene/service/GeneSearchServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/genome/gene/service/GeneSearchServiceImpl.java @@ -216,7 +216,7 @@ public Collection searchGenesAndGeneGroups( String qu // convert result object to a value object List> dbsgvo = taxonCheckedSets.stream() .filter( Objects::nonNull ) - .map( sr -> SearchResult.from( sr, geneSetValueObjectHelper.convertToValueObject( sr.getResultObject() ) ) ) + .map( sr -> sr.withResultObject( geneSetValueObjectHelper.convertToValueObject( sr.getResultObject() ) ) ) .collect( Collectors.toList() ); geneSets = SearchResultDisplayObject.convertSearchResults2SearchResultDisplayObjects( dbsgvo ); diff --git a/gemma-core/src/main/java/ubic/gemma/core/metrics/binder/ThreadPoolTaskExecutorMetrics.java b/gemma-core/src/main/java/ubic/gemma/core/metrics/binder/ThreadPoolTaskExecutorMetrics.java new file mode 100644 index 0000000000..1b9778e290 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/metrics/binder/ThreadPoolTaskExecutorMetrics.java @@ -0,0 +1,51 @@ +package ubic.gemma.core.metrics.binder; + +import io.micrometer.core.instrument.Gauge; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.binder.MeterBinder; +import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; + +import javax.annotation.Nullable; +import javax.annotation.ParametersAreNonnullByDefault; + +@ParametersAreNonnullByDefault +public class ThreadPoolTaskExecutorMetrics implements MeterBinder { + + private final ThreadPoolTaskExecutor executor; + + @Nullable + private String poolName; + + public ThreadPoolTaskExecutorMetrics( ThreadPoolTaskExecutor executor ) { + this.executor = executor; + } + + @Override + public void bindTo( MeterRegistry registry ) { + String poolName = this.poolName != null ? this.poolName : executor.getThreadNamePrefix(); + Gauge.builder( "threadPool.corePoolSize", executor, ThreadPoolTaskExecutor::getCorePoolSize ) + .description( "Core pool size" ) + .tags( "pool", poolName ) + .register( registry ); + Gauge.builder( "threadPool.maxPoolSize", executor, e -> e.getMaxPoolSize() == Integer.MAX_VALUE ? Double.POSITIVE_INFINITY : e.getMaxPoolSize() ) + .description( "Maximum pool size" ) + .tags( "pool", poolName ) + .register( registry ); + Gauge.builder( "threadPool.poolSize", executor, ThreadPoolTaskExecutor::getPoolSize ) + .description( "Pool size" ) + .tags( "pool", poolName ) + .register( registry ); + Gauge.builder( "threadPool.activeCount", executor, ThreadPoolTaskExecutor::getActiveCount ) + .description( "Number of active threads" ) + .tags( "pool", poolName ) + .register( registry ); + Gauge.builder( "threadPool.queueSize", executor, e -> e.getThreadPoolExecutor().getQueue().size() ) + .description( "Queue size" ) + .tags( "pool", poolName ) + .register( registry ); + } + + public void setPoolName( String poolName ) { + this.poolName = poolName; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/AbstractOntologyResourceSimple.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/AbstractOntologyResourceSimple.java index 1dd2be213c..c33793e7be 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/AbstractOntologyResourceSimple.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/AbstractOntologyResourceSimple.java @@ -16,11 +16,22 @@ protected AbstractOntologyResourceSimple( @Nullable String uri, String label ) { this.label = label; } + @Override + public String getLocalName() { + return uri; + } + @Override public String getLabel() { return label; } + @Nullable + @Override + public String getComment() { + return null; + } + @Override @Nullable public String getUri() { diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java index 6932d204c9..fd27df8d8f 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java @@ -2,10 +2,13 @@ import lombok.EqualsAndHashCode; import lombok.Value; -import org.apache.commons.math3.util.Combinations; +import lombok.extern.apachecommons.CommonsLog; +import org.apache.commons.lang3.time.StopWatch; import org.springframework.cache.Cache; +import org.springframework.util.Assert; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.providers.OntologyService; +import ubic.basecode.ontology.search.OntologySearchException; import ubic.gemma.persistence.util.CacheUtils; import javax.annotation.Nullable; @@ -14,19 +17,49 @@ /** * High-level cache abstraction for retrieving parents and children of a set of terms. *

- * The main approach here for caching is to lookup all the possible {@code k-1} subsets (then {@code k - 2}, - * {@code k - 3}, ...) of a given query and only retrieve the difference from the {@link OntologyService}. + * The main approach here for caching is to enumerate cache keys to find subsets of a given query and only retrieve the + * difference from the {@link OntologyService}. * @author poirigui */ +@CommonsLog class OntologyCache { - private final Cache parentsCache, childrenCache; + private final Cache searchCache, parentsCache, childrenCache; - OntologyCache( Cache parentsCache, Cache childrenCache ) { + private int minSubsetSize = 1; + + OntologyCache( Cache searchCache, Cache parentsCache, Cache childrenCache ) { + this.searchCache = searchCache; this.parentsCache = parentsCache; this.childrenCache = childrenCache; } + /** + * Minimum size of subsets to consider when enumerating cache keys. + */ + void setMinSubsetSize( int minSubsetSize ) { + Assert.isTrue( minSubsetSize > 0 ); + this.minSubsetSize = minSubsetSize; + } + + public Collection findTerm( OntologyService ontology, String query ) throws OntologySearchException { + SearchCacheKey key = new SearchCacheKey( ontology, query ); + + try ( CacheUtils.Lock ignored = CacheUtils.acquireReadLock( searchCache, key ) ) { + Cache.ValueWrapper value = searchCache.get( key ); + if ( value != null ) { + //noinspection unchecked + return ( Collection ) value.get(); + } + } + + try ( CacheUtils.Lock ignored = CacheUtils.acquireWriteLock( searchCache, key ) ) { + Collection results = ontology.findTerm( query ); + searchCache.put( key, results ); + return results; + } + } + /** * Obtain the parents of a given set of terms. */ @@ -41,6 +74,14 @@ Set getChildren( OntologyService os, Collection term return getParentsOrChildren( os, terms, direct, includeAdditionalProperties, childrenCache, false ); } + /** + * Clear the search cache for all entries related to a given ontology service. + * @param serv + */ + public void clearSearchCacheByOntology( OntologyService serv ) { + CacheUtils.evictIf( searchCache, key -> ( ( SearchCacheKey ) key ).getOntologyService().equals( serv ) ); + } + /** * Clear the cache for all entries related to a given ontology service. */ @@ -53,140 +94,102 @@ private Set getParentsOrChildren( OntologyService os, Collection termsSet = new HashSet<>( terms ); - Object key = new ParentsOrChildrenCacheKey( os, termsSet, direct, includeAdditionalProperties ); - Cache.ValueWrapper value = cache.get( key ); - if ( value != null ) { - //noinspection unchecked - return ( Set ) value.get(); - } else { - if ( termsSet.size() > 1 ) { + ParentsOrChildrenCacheKey key = new ParentsOrChildrenCacheKey( os, termsSet, direct, includeAdditionalProperties ); + + // there might be a thread computing this cache entry + long initialLockAcquisitionMs = timer.getTime(); + try ( CacheUtils.Lock ignored = CacheUtils.acquireReadLock( cache, key ) ) { + initialLockAcquisitionMs = timer.getTime() - initialLockAcquisitionMs; + Cache.ValueWrapper value = cache.get( key ); + if ( value != null ) { //noinspection unchecked - HashSet keys = new HashSet<>( ( Collection ) CacheUtils.getKeys( cache ) ); - - // try looking for k-1 or k-2 subsets - ParentsOrChildrenCacheKey keyForSubset = lookupMaximalSubsetByCombination( keys, os, termsSet, direct, includeAdditionalProperties ); - - // try enumerating keys (initially fast, but gets slower as the cache grows) - if ( keyForSubset == null ) { - keyForSubset = lookupMaximalSubsetByEnumeratingKeys( keys, os, termsSet, direct, includeAdditionalProperties ); - } - - if ( keyForSubset != null ) { - Cache.ValueWrapper valueForSubset = cache.get( keyForSubset ); - if ( valueForSubset != null ) { - //noinspection unchecked - Set resultsForSubset = ( Set ) valueForSubset.get(); - // only query the difference - Set remainingTerms = new HashSet<>( termsSet ); - remainingTerms.removeAll( keyForSubset.terms ); - Set remainingResults = getParentsOrChildren( os, remainingTerms, direct, includeAdditionalProperties, cache, ancestors ); - // recombine the results - Set results = new HashSet<>( resultsForSubset ); - results.addAll( remainingResults ); - cache.put( key, results ); - return results; - } - } - } - - // no subsets are of any use, so directly query - try ( CacheUtils.Lock ignored = CacheUtils.acquireWriteLock( cache, key ) ) { - // check if the entry have been computed by another thread - value = cache.get( key ); - if ( value != null ) { - //noinspection unchecked - return ( Set ) value.get(); - } - Set newVal = ancestors ? - os.getParents( termsSet, direct, includeAdditionalProperties ) : - os.getChildren( termsSet, direct, includeAdditionalProperties ); - cache.put( key, newVal ); - return newVal; + return ( Set ) value.get(); } } - } - - /** - * A HashSet implementation with a cheap hashCode() operation. - */ - private static class IncrementalHashSet extends HashSet { - - private int hashCode = 0; - - public IncrementalHashSet( Set terms ) { - super( terms ); - } - @Override - public boolean add( T o ) { - if ( !super.add( o ) ) { - hashCode += o.hashCode(); - return true; + long lookupSubsetMs = 0; + ParentsOrChildrenCacheKey keyForSubset; + // enough terms to make it worth looking for subsets... + if ( termsSet.size() >= minSubsetSize + 1 ) { + lookupSubsetMs = timer.getTime(); + keyForSubset = lookupMaximalSubsetByEnumeratingKeys( cache, os, termsSet, direct, includeAdditionalProperties ); + lookupSubsetMs = timer.getTime() - lookupSubsetMs; + if ( lookupSubsetMs > 100 ) { + log.warn( String.format( "Enumerating cache keys for finding a maximal subset for %s of %s took %d ms and %s", + ancestors ? "parents" : "children", key, lookupSubsetMs, keyForSubset != null ? "succeeded with " + keyForSubset + " terms" : "failed" ) ); } - return false; + } else { + // we used to enumerate all possible k-1, k-2 subsets, but that's just too slow compared to enumerating + // cache keys, other strategies can be implemented here if necessary + keyForSubset = null; } - @Override - public boolean remove( Object o ) { - if ( !super.remove( o ) ) { - hashCode -= o.hashCode(); - return true; + if ( keyForSubset != null ) { + Cache.ValueWrapper valueForSubset = cache.get( keyForSubset ); + if ( valueForSubset != null ) { + //noinspection unchecked + Set resultsForSubset = ( Set ) valueForSubset.get(); + // only query the difference + Set remainingTerms = new HashSet<>( termsSet ); + remainingTerms.removeAll( keyForSubset.terms ); + Set remainingResults = getParentsOrChildren( os, remainingTerms, direct, includeAdditionalProperties, cache, ancestors ); + // recombine the results + Set results = new HashSet<>( resultsForSubset ); + results.addAll( remainingResults ); + cache.put( key, results ); + return results; + } else { + log.warn( "Missing expected key from the " + ( ancestors ? "parents" : "children" ) + " cache: " + keyForSubset ); } - return false; - } - - @Override - public int hashCode() { - return hashCode; } - } - /** - * Check if a k-1 (or k-2) subset of a given set of terms is in the given cache and query the difference. - *

- * Because the number of subset is exponential in the number of terms, we only try subsets of size 1 and 2 if - * {@code n < 100}. - */ - @Nullable - private ParentsOrChildrenCacheKey lookupMaximalSubsetByCombination( Set keys, OntologyService os, Set terms, boolean direct, boolean includeAdditionalProperties ) { - // we will be generating subsets from this - List orderedTerms = new ArrayList<>( terms ); - // we will be mutating this - Set termsForSubset = new IncrementalHashSet<>( terms ); - // successively try removing k-subsets (k = 1 up to 3); it grows exponentially so careful here! - int n = orderedTerms.size(); - // n = 100 has ~5000 2-combinations - int maxN = n < 100 ? 2 : 1; - // if n = k, there's only one subset, and it's the same case as if no subsets were found - for ( int k = 1; k <= Math.min( n - 1, maxN ); k++ ) { - for ( int[] is : new Combinations( n, k ) ) { - for ( int i : is ) { - termsForSubset.remove( orderedTerms.get( i ) ); - } - // note: ParentsOrChildrenCacheKey is immutable so that the hashCode can be efficiently computed - ParentsOrChildrenCacheKey keyForSubset = new ParentsOrChildrenCacheKey( os, termsForSubset, direct, includeAdditionalProperties ); - if ( keys.contains( keyForSubset ) ) { - return keyForSubset; - } - for ( int i : is ) { - termsForSubset.add( orderedTerms.get( i ) ); - } + long acquireMs = timer.getTime(); + long computingMs = 0; + try ( CacheUtils.Lock ignored = CacheUtils.acquireWriteLock( cache, key ) ) { + acquireMs = timer.getTime() - acquireMs; + // lookup the cache in case another thread computed the result while we were enumerating subsets + Cache.ValueWrapper value = cache.get( key ); + if ( value != null ) { + //noinspection unchecked + return ( Set ) value.get(); + } + computingMs = timer.getTime(); + // no subset found in the cache, just compute it from scratch + Set newVal = ancestors ? + os.getParents( termsSet, direct, includeAdditionalProperties ) : + os.getChildren( termsSet, direct, includeAdditionalProperties ); + computingMs = timer.getTime() - computingMs; + // ignore empty newVal, it might just be that the ontology is not initialized yet + if ( !newVal.isEmpty() && computingMs < lookupSubsetMs ) { + log.warn( String.format( "Computing %d %s terms for %s took less time than looking up subsets, increasing the minSubsetSize might be beneficial", + newVal.size(), + ancestors ? "parents" : "children", + key ) ); + } + cache.put( key, newVal ); + return newVal; + } finally { + if ( timer.getTime() > 500 ) { + log.warn( String.format( "Retrieving %s for %s took %d ms (acquiring locks: %d ms, enumerating subsets: %d ms, computing: %d ms)", + ancestors ? "parents" : "children", key, timer.getTime(), initialLockAcquisitionMs + acquireMs, lookupSubsetMs, computingMs ) ); } } - return null; } /** * Enumerate the cache's keys to find the maximal subset. - *

- * This is less efficient than {@link #lookupMaximalSubsetByCombination(Set, OntologyService, Set, boolean, boolean)} - * because we to verify if a subset exist for each key of the cache. */ @Nullable - private ParentsOrChildrenCacheKey lookupMaximalSubsetByEnumeratingKeys( Collection keys, OntologyService os, Set terms, boolean direct, boolean includeAdditionalProperties ) { - return keys.stream() - .filter( k -> k.ontologyService.equals( os ) && k.direct == direct && k.includeAdditionalProperties == includeAdditionalProperties && terms.containsAll( k.terms ) ) + private ParentsOrChildrenCacheKey lookupMaximalSubsetByEnumeratingKeys( Cache cache, OntologyService os, Set terms, boolean direct, boolean includeAdditionalProperties ) { + return CacheUtils.getKeys( cache ).stream() + .map( o -> ( ParentsOrChildrenCacheKey ) o ) + .filter( k -> k.direct == direct && k.includeAdditionalProperties == includeAdditionalProperties && k.ontologyService.equals( os ) ) + // ignore empty subsets, those will cause an infinite loop + // skip sets which are larger or equal in size, those cannot be subsets + .filter( k -> k.terms.size() >= minSubsetSize && k.terms.size() < terms.size() && terms.containsAll( k.terms ) ) .max( Comparator.comparingInt( k1 -> k1.terms.size() ) ) .orElse( null ); } @@ -194,9 +197,22 @@ private ParentsOrChildrenCacheKey lookupMaximalSubsetByEnumeratingKeys( Collecti @Value @EqualsAndHashCode(cacheStrategy = EqualsAndHashCode.CacheStrategy.LAZY) private static class ParentsOrChildrenCacheKey { - ubic.basecode.ontology.providers.OntologyService ontologyService; + OntologyService ontologyService; Set terms; boolean direct; boolean includeAdditionalProperties; + + @Override + public String toString() { + return String.format( "%d terms from %s [%s] [%s]", terms.size(), ontologyService, + direct ? "direct" : "all", + includeAdditionalProperties ? "subClassOf and " + ontologyService.getAdditionalPropertyUris().size() + " additional properties" : "only subClassOf" ); + } + } + + @Value + private static class SearchCacheKey { + OntologyService ontologyService; + String query; } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java index dabd33995f..254cc995e5 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java @@ -18,10 +18,8 @@ import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.core.search.SearchException; import ubic.gemma.model.common.description.Characteristic; -import ubic.gemma.model.expression.biomaterial.BioMaterial; -import ubic.gemma.model.expression.experiment.ExpressionExperiment; -import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.common.description.CharacteristicValueObject; +import ubic.gemma.model.genome.Taxon; import javax.annotation.Nullable; import java.util.Collection; @@ -35,17 +33,13 @@ public interface OntologyService { /** - *

* Locates usages of obsolete terms in Characteristics, ignoring Gene Ontology annotations. Requires the ontologies are loaded into memory. - *

*

- * Will also find terms that are no longer in an ontology we use. - *

- * + * Will also find terms that are no longer in an ontology we use. * @return map of value URI to a representative characteristic using the term. The latter will contain a count - * of how many ocurrences there were. + * of how many occurrences there were. */ - Map findObsoleteTermUsage(); + Map findObsoleteTermUsage(); /** * Using the ontology and values in the database, for a search searchQuery given by the client give an ordered list @@ -55,6 +49,7 @@ public interface OntologyService { * @param useNeuroCartaOntology use neurocarta ontology * @return characteristic vos */ + @Deprecated Collection findExperimentsCharacteristicTags( String searchQuery, boolean useNeuroCartaOntology ) throws SearchException; @@ -63,10 +58,10 @@ Collection findExperimentsCharacteristicTags( String * looks like a URI, it just retrieves the term. * For other queries, this a lucene backed search, is inexact and for general terms can return a lot of results. * - * @param search search + * @param query search query * @return returns a collection of ontologyTerm's */ - Collection findTerms( String search ) throws SearchException; + Collection findTerms( String query ) throws SearchException; /** * Given a search string will first look through the characteristic database for any entries that have a match. If a @@ -82,15 +77,14 @@ Collection findExperimentsCharacteristicTags( String Collection findTermsInexact( String givenQueryString, @Nullable Taxon taxon ) throws SearchException; /** - * @return terms which are allowed for use in the Category of a Characteristic + * Obtain terms which are allowed for use in the category of a {@link ubic.gemma.model.common.description.Characteristic}. */ - Collection getCategoryTerms(); + Set getCategoryTerms(); /** - * - * @return terms allowed for the predicate (relationship) in a Characteristic + * Obtain terms allowed for the predicate (relationship) in a {@link ubic.gemma.model.expression.experiment.Statement}. */ - Collection getRelationTerms(); + Set getRelationTerms(); /** * Obtain the parents of a collection of terms. @@ -105,14 +99,13 @@ Collection findExperimentsCharacteristicTags( String Set getChildren( Collection matchingTerms, boolean direct, boolean includeAdditionalProperties ); /** - * @param uri uri - * @return the definition of the associated OntologyTerm. This requires that the ontology be loaded. + * Obtain a definition for the given URI. */ + @Nullable String getDefinition( String uri ); /** - * @param uri uri - * @return the OntologyTerm for the specified URI. + * Obtain a term for the given URI. */ @Nullable OntologyTerm getTerm( String uri ); @@ -122,8 +115,6 @@ Collection findExperimentsCharacteristicTags( String */ Set getTerms( Collection uris ); - boolean isObsolete( String uri ); - /** * Recreate the search indices, for ontologies that are loaded. */ diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java index 953f5ab519..3bcaadbead 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java @@ -18,6 +18,7 @@ */ package ubic.gemma.core.ontology; +import org.apache.commons.lang3.RandomUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.logging.Log; @@ -30,12 +31,11 @@ import org.springframework.core.io.ClassPathResource; import org.springframework.core.io.Resource; import org.springframework.core.task.AsyncTaskExecutor; +import org.springframework.core.task.SimpleAsyncTaskExecutor; import org.springframework.core.task.TaskExecutor; import org.springframework.stereotype.Service; -import ubic.basecode.ontology.model.AnnotationProperty; -import ubic.basecode.ontology.model.OntologyProperty; -import ubic.basecode.ontology.model.OntologyTerm; -import ubic.basecode.ontology.model.OntologyTermSimple; +import org.springframework.util.Assert; +import ubic.basecode.ontology.model.*; import ubic.basecode.ontology.providers.ExperimentalFactorOntologyService; import ubic.basecode.ontology.providers.ObiService; import ubic.basecode.ontology.search.OntologySearch; @@ -63,6 +63,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.stream.Collectors; @@ -77,8 +78,8 @@ public class OntologyServiceImpl implements OntologyService, InitializingBean { private static final Log log = LogFactory.getLog( OntologyServiceImpl.class.getName() ); - private static final String + SEARCH_CACHE_NAME = "OntologyService.search", PARENTS_CACHE_NAME = "OntologyService.parents", CHILDREN_CACHE_NAME = "OntologyService.children"; @@ -91,7 +92,7 @@ public class OntologyServiceImpl implements OntologyService, InitializingBean { @Autowired private GeneService geneService; @Autowired - private AsyncTaskExecutor taskExecutor; + private AsyncTaskExecutor taskExecutor = new SimpleAsyncTaskExecutor(); @Autowired private ExperimentalFactorOntologyService experimentalFactorOntologyService; @@ -121,7 +122,7 @@ public class OntologyServiceImpl implements OntologyService, InitializingBean { @Override public void afterPropertiesSet() throws Exception { - ontologyCache = new OntologyCache( cacheManager.getCache( PARENTS_CACHE_NAME ), cacheManager.getCache( CHILDREN_CACHE_NAME ) ); + ontologyCache = new OntologyCache( cacheManager.getCache( SEARCH_CACHE_NAME ), cacheManager.getCache( PARENTS_CACHE_NAME ), cacheManager.getCache( CHILDREN_CACHE_NAME ) ); if ( ontologyServiceFactories != null && autoLoadOntologies ) { List enabledOntologyServices = ontologyServiceFactories.stream() .map( factory -> { @@ -259,14 +260,7 @@ public Collection findTerms( String search ) throws BaseCodeOntolo * URI input: just retrieve the term. */ if ( search.startsWith( "http://" ) ) { - return combineInThreads( ontology -> { - OntologyTerm found = ontology.getTerm( search ); - if ( found != null ) { - return Collections.singleton( found ); - } else { - return Collections.emptySet(); - } - } ); + return Collections.singleton( findFirst( ontology -> ontology.getTerm( search ), "terms matching " + search ) ); } Collection results = new HashSet<>(); @@ -280,17 +274,19 @@ public Collection findTerms( String search ) throws BaseCodeOntolo return results; } - results = searchInThreads( ontology -> ontology.findTerm( query ) ); + results = searchInThreads( ontology -> ontologyCache.findTerm( ontology, query ), query ); if ( geneOntologyService.isOntologyLoaded() ) { try { - results.addAll( geneOntologyService.findTerm( search ) ); + results.addAll( ontologyCache.findTerm( geneOntologyService, search ) ); } catch ( OntologySearchException e ) { throw new BaseCodeOntologySearchException( e ); } } - return results; + return results.stream() + .sorted( Comparator.comparing( OntologyTerm::getScore, Comparator.nullsLast( Comparator.reverseOrder() ) ) ) + .collect( Collectors.toCollection( LinkedHashSet::new ) ); } @Override @@ -304,7 +300,7 @@ public Collection findTermsInexact( String givenQuery String queryString = OntologySearch.stripInvalidCharacters( givenQueryString ); if ( StringUtils.isBlank( queryString ) ) { OntologyServiceImpl.log.warn( "The query was not valid (ended up being empty): " + givenQueryString ); - return new HashSet<>(); + return Collections.emptySet(); } if ( OntologyServiceImpl.log.isDebugEnabled() ) { @@ -326,18 +322,18 @@ public Collection findTermsInexact( String givenQuery Set ontologySearchResults = new HashSet<>(); ontologySearchResults.addAll( searchInThreads( service -> { Collection results2; - results2 = service.findTerm( queryString ); + results2 = ontologyCache.findTerm( service, queryString ); if ( results2.isEmpty() ) return Collections.emptySet(); return CharacteristicValueObject.characteristic2CharacteristicVO( this.termsToCharacteristics( results2 ) ); - } ) ); + }, queryString ) ); // get GO terms, if we don't already have a lot of possibilities. (might have to adjust this) StopWatch findGoTerms = StopWatch.createStarted(); if ( geneOntologyService.isOntologyLoaded() ) { try { ontologySearchResults.addAll( CharacteristicValueObject.characteristic2CharacteristicVO( - this.termsToCharacteristics( geneOntologyService.findTerm( queryString ) ) ) ); + this.termsToCharacteristics( ontologyCache.findTerm( geneOntologyService, queryString ) ) ) ); } catch ( OntologySearchException e ) { throw new BaseCodeOntologySearchException( e ); } @@ -368,9 +364,9 @@ public Collection findTermsInexact( String givenQuery countOccurrencesTimerAfter.stop(); // Sort the results rather elaborately. - Collection sortedResults = results.values().stream() + LinkedHashSet sortedResults = results.values().stream() .sorted( getCharacteristicComparator( queryString ) ) - .collect( Collectors.toList() ); + .collect( Collectors.toCollection( LinkedHashSet::new ) ); watch.stop(); @@ -388,35 +384,69 @@ public Collection findTermsInexact( String givenQuery @Override public Set getParents( Collection terms, boolean direct, boolean includeAdditionalProperties ) { - Set toQuery = new HashSet<>( terms ); - Set results = new HashSet<>(); - while ( !toQuery.isEmpty() ) { - Set newResults = combineInThreads( os -> ontologyCache.getParents( os, toQuery, direct, includeAdditionalProperties ) ); - results.addAll( newResults ); - // toQuery = newResults - toQuery - newResults.removeAll( toQuery ); - toQuery.clear(); - toQuery.addAll( newResults ); - } - return results; + return getParentsOrChildren( terms, direct, includeAdditionalProperties, true ); } @Override public Set getChildren( Collection terms, boolean direct, boolean includeAdditionalProperties ) { + return getParentsOrChildren( terms, direct, includeAdditionalProperties, false ); + } + + private Set getParentsOrChildren( Collection terms, boolean direct, boolean includeAdditionalProperties, boolean parents ) { + if ( terms.isEmpty() ) { + return Collections.emptySet(); + } Set toQuery = new HashSet<>( terms ); - Set results = new HashSet<>(); + List results = new ArrayList<>(); while ( !toQuery.isEmpty() ) { - Set newResults = combineInThreads( os -> ontologyCache.getChildren( os, toQuery, direct, includeAdditionalProperties ) ); - results.addAll( newResults ); - newResults.removeAll( toQuery ); - toQuery.clear(); - toQuery.addAll( newResults ); + List newResults = combineInThreads( os -> { + StopWatch timer = StopWatch.createStarted(); + try { + return parents ? ontologyCache.getParents( os, toQuery, direct, includeAdditionalProperties ) + : ontologyCache.getChildren( os, toQuery, direct, includeAdditionalProperties ); + } finally { + if ( timer.getTime() > Math.max( 10L * terms.size(), 500L ) ) { + log.warn( String.format( "Obtaining %s from %s for %s took %d ms", + parents ? "parents" : "children", + os, + terms.size() == 1 ? terms.iterator().next() : terms.size() + " terms", + timer.getTime() ) ); + } + } + }, String.format( "%s %s of %d terms", direct ? "direct" : "all", parents ? "parents" : "children", terms.size() ) ); + + if ( results.addAll( newResults ) && !direct ) { + // there are new results (i.e. a term was inferred from a different ontology), we need to requery them + // if they were not in the query + newResults.removeAll( toQuery ); + toQuery.clear(); + toQuery.addAll( newResults ); + log.debug( String.format( "Found %d new %s terms, will requery them.", newResults.size(), + parents ? "parents" : "children" ) ); + } else { + toQuery.clear(); + } } - return results; + + // when an ontology returns a result without a label, it might be referring to another ontology, so we attempt + // to retrieve a results with a label as a replacement + Set resultsWithMissingLabels = results.stream() + .filter( t -> t.getLabel() == null ) + .map( OntologyResource::getUri ) + .collect( Collectors.toSet() ); + if ( !resultsWithMissingLabels.isEmpty() ) { + Set replacements = getTerms( resultsWithMissingLabels ); + results.removeAll( replacements ); + results.addAll( replacements ); + } + + // drop terms without labels + results.removeIf( t -> t.getLabel() == null ); + return new HashSet<>( results ); } @Override - public Collection getCategoryTerms() { + public Set getCategoryTerms() { return categoryTerms.stream() .map( term -> { String termUri = term.getUri(); @@ -436,25 +466,19 @@ public Collection getCategoryTerms() { @Override - public Collection getRelationTerms() { + public Set getRelationTerms() { // FIXME: it's not quite like categoryTerms so this map operation is probably not needed at all, the relations don't come from any particular ontology - return relationTerms.stream() - .map( term -> { - return term; - } ) - .collect( Collectors.toSet() ); + return Collections.unmodifiableSet( relationTerms ); } @Override public String getDefinition( String uri ) { - if ( uri == null ) return null; OntologyTerm ot = this.getTerm( uri ); if ( ot != null ) { - for ( AnnotationProperty ann : ot.getAnnotations() ) { - // FIXME: not clear this will work with all ontologies. UBERON, HP, MP, MONDO does it this way. - if ( "http://purl.obolibrary.org/obo/IAO_0000115".equals( ann.getUri() ) ) { - return ann.getContents(); - } + // FIXME: not clear this will work with all ontologies. UBERON, HP, MP, MONDO does it this way. + AnnotationProperty annot = ot.getAnnotation( "http://purl.obolibrary.org/obo/IAO_0000115" ); + if ( annot != null ) { + return annot.getContents(); } } return null; @@ -464,46 +488,31 @@ public String getDefinition( String uri ) { public OntologyTerm getTerm( String uri ) { return findFirst( ontology -> { OntologyTerm term = ontology.getTerm( uri ); - // some terms mentioned, but not declared in some ontologies (see https://github.com/PavlidisLab/Gemma/issues/998) - // FIXME: baseCode should return null if there is no , not default the local name or URI - if ( term != null && ( term.getLabel() == null || term.getLabel().equals( term.getUri() ) ) ) { + if ( term != null && term.getLabel() == null ) { return null; } return term; - } ); + }, uri ); } @Override public Set getTerms( Collection uris ) { Set distinctUris = uris instanceof Set ? ( Set ) uris : new HashSet<>( uris ); - return combineInThreads( os -> distinctUris.stream().map( os::getTerm ).filter( Objects::nonNull ).collect( Collectors.toSet() ) ); - } - - /** - * @return true if the Uri is an ObsoleteClass. This will only work if the ontology in question is loaded. - */ - @Override - public boolean isObsolete( String uri ) { - if ( uri == null ) - return false; - OntologyTerm t = this.getTerm( uri ); - return t != null && t.isObsolete(); + List results = combineInThreads( os -> distinctUris.stream().map( os::getTerm ).filter( Objects::nonNull ).collect( Collectors.toSet() ), + String.format( "terms for %d URIs", uris.size() ) ); + results.removeIf( t -> t.getLabel() == null ); + return new HashSet<>( results ); } @Override public void reindexAllOntologies() { for ( ubic.basecode.ontology.providers.OntologyService serv : this.ontologyServices ) { - if ( serv.isOntologyLoaded() ) { - OntologyServiceImpl.log.info( "Reindexing: " + serv ); - try { + if ( serv.isEnabled() && serv.isSearchEnabled() ) { + ontologyTaskExecutor.execute( () -> { + OntologyServiceImpl.log.info( "Reindexing " + serv + "..." ); serv.index( true ); - } catch ( Exception e ) { - OntologyServiceImpl.log.error( "Failed to index " + serv + ": " + e.getMessage(), e ); - } - } else { - if ( serv.isEnabled() ) - OntologyServiceImpl.log - .info( "Not available for reindexing (not enabled or finished initialization): " + serv ); + ontologyCache.clearSearchCacheByOntology( serv ); + } ); } } } @@ -511,10 +520,19 @@ public void reindexAllOntologies() { @Override public void reinitializeAndReindexAllOntologies() { for ( ubic.basecode.ontology.providers.OntologyService serv : this.ontologyServices ) { - ontologyTaskExecutor.execute( () -> { - serv.initialize( true, true ); - ontologyCache.clearByOntology( serv ); - } ); + if ( serv.isOntologyLoaded() ) { + if ( serv.isEnabled() ) { + boolean isSearchEnabled = serv.isSearchEnabled(); + ontologyTaskExecutor.execute( () -> { + OntologyServiceImpl.log.info( "Reinitializing " + serv + "..." ); + serv.initialize( true, isSearchEnabled ); + ontologyCache.clearByOntology( serv ); + if ( isSearchEnabled ) { + ontologyCache.clearSearchCacheByOntology( serv ); + } + } ); + } + } } } @@ -566,20 +584,17 @@ private Characteristic termToCharacteristic( OntologyTerm res ) { } @Override - public Map findObsoleteTermUsage() { - Map vos = new HashMap<>(); - - int start = 0; - int step = 5000; + public Map findObsoleteTermUsage() { + Map results = new HashMap<>(); int prevObsoleteCnt = 0; int checked = 0; - CharacteristicValueObject lastObsolete = null; - - while ( true ) { + Characteristic lastObsolete = null; + long total = characteristicService.countAll(); + int step = 5000; + for ( int start = 0; ; start += step ) { Collection chars = characteristicService.browse( start, step ); - start += step; if ( chars == null || chars.isEmpty() ) { break; @@ -593,35 +608,30 @@ public Map findObsoleteTermUsage() { checked++; - if ( this.getTerm( valueUri ) == null || this.isObsolete( valueUri ) ) { - + OntologyTerm term = this.getTerm( valueUri ); + if ( term != null && term.isObsolete() ) { if ( valueUri.startsWith( "http://purl.org/commons/record/ncbi_gene" ) || valueUri.startsWith( "http://purl.obolibrary.org/obo/GO_" ) ) { // these are false positives, they aren't in an ontology, and we aren't looking at GO Terms. continue; } - - - if ( !vos.containsKey( valueUri ) ) { - vos.put( valueUri, new CharacteristicValueObject( ch ) ); - } - vos.get( valueUri ).incrementOccurrenceCount(); + results.compute( ch, ( k, v ) -> v == null ? 1L : v + 1L ); if ( log.isDebugEnabled() ) OntologyServiceImpl.log.debug( "Found obsolete or missing term: " + ch.getValue() + " - " + valueUri ); - lastObsolete = vos.get( valueUri ); + lastObsolete = ch; } } - if ( vos.size() > prevObsoleteCnt ) { - OntologyServiceImpl.log.info( "Found " + vos.size() + " obsolete or missing terms so far, tested " + checked + " characteristics" ); + if ( results.size() > prevObsoleteCnt ) { + OntologyServiceImpl.log.info( "Found " + results.size() + " obsolete or missing terms so far, tested " + checked + " out of " + total + " characteristics" ); OntologyServiceImpl.log.info( "Last obsolete term seen: " + lastObsolete.getValue() + " - " + lastObsolete.getValueUri() ); } - prevObsoleteCnt = vos.size(); + prevObsoleteCnt = results.size(); } - OntologyServiceImpl.log.info( "Done, obsolete or missing terms found: " + vos.size() ); + OntologyServiceImpl.log.info( "Done, obsolete or missing terms found: " + results.size() ); - return vos; + return results; } private void searchForCharacteristics( String queryString, Map previouslyUsedInSystem ) { @@ -680,7 +690,7 @@ private Collection findCharacteristicsFromOntology( S } return searchInThreads( ontologyService -> { - Collection ontologyTerms = ontologyService.findTerm( searchQuery ); + Collection ontologyTerms = ontologyCache.findTerm( ontologyService, searchQuery ); Collection characteristicsFromOntology = new HashSet<>(); for ( OntologyTerm ontologyTerm : ontologyTerms ) { // if the ontology term wasnt already found in the database @@ -694,14 +704,7 @@ private Collection findCharacteristicsFromOntology( S } } return characteristicsFromOntology; - }, ontologyServicesToUse ); - } - - private String foundValueKey( Characteristic c ) { - if ( StringUtils.isNotBlank( c.getValueUri() ) ) { - return c.getValueUri().toLowerCase(); - } - return c.getValue().toLowerCase(); + }, ontologyServicesToUse, "terms matching " + searchQuery ); } /** @@ -846,24 +849,25 @@ static Comparator getCharacteristicComparator( String .thenComparing( CharacteristicValueObject::getNumTimesUsed, Comparator.reverseOrder() ) // most frequently used first .thenComparing( CharacteristicValueObject::isAlreadyPresentInDatabase, Comparator.reverseOrder() ) // already used terms first .thenComparing( c -> c.getValue() != null ? c.getValue().length() : null, Comparator.nullsLast( Comparator.naturalOrder() ) ); // shorter term first - } /** * Find the first non-null result among loaded ontology services. */ @Nullable - private T findFirst( Function function ) { + private T findFirst( Function function, String query ) { List> futures = new ArrayList<>( ontologyServices.size() ); + List objects = new ArrayList<>( ontologyServices.size() ); ExecutorCompletionService completionService = new ExecutorCompletionService<>( taskExecutor ); for ( ubic.basecode.ontology.providers.OntologyService service : ontologyServices ) { if ( service.isOntologyLoaded() ) { futures.add( completionService.submit( () -> function.apply( service ) ) ); + objects.add( service ); } } try { for ( int i = 0; i < futures.size(); i++ ) { - T result = completionService.take().get(); + T result = pollCompletionService( completionService, "Finding first result for " + query, futures, objects, 1000, TimeUnit.MILLISECONDS, 1.5 ); if ( result != null ) { return result; } @@ -880,30 +884,74 @@ private T findFirst( Function future : futures ) { - future.cancel( true ); - } + cancelRemainingFutures( futures, objects ); + } + } + + @FunctionalInterface + private interface SearchFunction { + Collection apply( ubic.basecode.ontology.providers.OntologyService service ) throws OntologySearchException; + } + + /** + * Similar to {@link #combineInThreads(Function, String)}, but also handles {@link OntologySearchException}. + */ + private List searchInThreads( SearchFunction function, String query ) throws BaseCodeOntologySearchException { + return searchInThreads( function, ontologyServices, query ); + } + + private List searchInThreads( SearchFunction function, List ontologyServices, String query ) throws BaseCodeOntologySearchException { + try { + return combineInThreads( os -> { + try { + return function.apply( os ); + } catch ( OntologySearchException e ) { + throw new OntologySearchExceptionWrapper( e ); + } + }, ontologyServices, query ); + } catch ( OntologySearchExceptionWrapper e ) { + throw new BaseCodeOntologySearchException( e.getCause() ); + } + } + + private static class OntologySearchExceptionWrapper extends RuntimeException { + + private final OntologySearchException cause; + + public OntologySearchExceptionWrapper( OntologySearchException e ) { + super( e ); + this.cause = e; + } + + @Override + public synchronized OntologySearchException getCause() { + return cause; } } + private List combineInThreads( Function> work, String query ) { + return combineInThreads( work, ontologyServices, query ); + } + /** * Apply a given function to all the loaded ontology service and combine the results in a set. *

* The functions are evaluated using Gemma's short-lived task executor. */ - private Set combineInThreads( Function> work, List ontologyServices ) { + private List combineInThreads( Function> work, List ontologyServices, String query ) { List>> futures = new ArrayList<>( ontologyServices.size() ); + List objects = new ArrayList<>( ontologyServices.size() ); ExecutorCompletionService> completionService = new ExecutorCompletionService<>( taskExecutor ); for ( ubic.basecode.ontology.providers.OntologyService os : ontologyServices ) { if ( os.isOntologyLoaded() ) { futures.add( completionService.submit( () -> work.apply( os ) ) ); + objects.add( os ); } } - Set children = new HashSet<>(); + List children = new ArrayList<>(); try { for ( int i = 0; i < futures.size(); i++ ) { - children.addAll( completionService.take().get() ); + children.addAll( pollCompletionService( completionService, "Combining all the results for " + query, futures, objects, 1000, TimeUnit.MILLISECONDS, 1.5 ) ); } } catch ( InterruptedException e ) { log.warn( "Current thread was interrupted while waiting, will only return results collected so far.", e ); @@ -916,59 +964,59 @@ private Set combineInThreads( Function> future : futures ) { - future.cancel( true ); - } + cancelRemainingFutures( futures, objects ); } return children; } - private Set combineInThreads( Function> work ) { - return combineInThreads( work, ontologyServices ); - } - - @FunctionalInterface - private interface SearchFunction { - Collection apply( ubic.basecode.ontology.providers.OntologyService service ) throws OntologySearchException; - } - - private Set searchInThreads( SearchFunction function, List ontologyServices ) throws BaseCodeOntologySearchException { - try { - return combineInThreads( os -> { - try { - return function.apply( os ); - } catch ( OntologySearchException e ) { - throw new OntologySearchExceptionWrapper( e ); - } - }, ontologyServices ); - } catch ( OntologySearchExceptionWrapper e ) { - throw new BaseCodeOntologySearchException( e.getCause() ); - } - } - /** - * Similar to {@link #combineInThreads(Function)}, but also handles {@link OntologySearchException}. + * Poll the next available future from the given completion service. + * + * @param completionService the completion service to poll from + * @param description a description of the task being waited for logging purposes + * @param futures the list of futures being awaited + * @param objects the list of objects corresponding to the futures for logging purposes + * @param timeout the amount of time to wait for resolving the next available future + * @param exponentialBackoff if the future does not resolve within the timeout, increase it by the given amount */ - private Set searchInThreads( SearchFunction function ) throws BaseCodeOntologySearchException { - return searchInThreads( function, ontologyServices ); + private T pollCompletionService( ExecutorCompletionService completionService, String description, List> futures, List objects, long timeout, TimeUnit timeUnit, double exponentialBackoff ) throws InterruptedException, ExecutionException { + Assert.isTrue( futures.size() == objects.size(), "The number of futures must match the number of descriptive objects." ); + Assert.isTrue( exponentialBackoff >= 1.0, "Exponential backoff factor must be greater or equal to 1." ); + StopWatch timer = StopWatch.createStarted(); + Future future; + double timeoutMs = TimeUnit.MILLISECONDS.convert( timeout, timeUnit ); + // a fuzz factor to prevent concurrent tasks from all timing out at the same time + // up to 10% of the initial timeout + double fuzzyMs = RandomUtils.nextDouble( 0.0, timeoutMs / 10.0 ); + while ( ( future = completionService.poll( ( long ) timeoutMs, timeUnit ) ) == null ) { + long i = futures.stream().filter( Future::isDone ).count(); + log.warn( String.format( "%s is taking too long (%d/%d completed so far, %s elapsed). The following are still running:\n\t%s", + description, i, futures.size(), timer, futures.stream() + .filter( f -> !f.isDone() ) + .map( futures::indexOf ) + .map( objects::get ) + .map( Object::toString ) + .collect( Collectors.joining( "\n\t" ) ) ) ); + timeoutMs = ( timeoutMs + fuzzyMs ) * exponentialBackoff; + } + return future.get(); } - private static class OntologySearchExceptionWrapper extends RuntimeException { - - private final OntologySearchException cause; - - public OntologySearchExceptionWrapper( OntologySearchException e ) { - super( e ); - this.cause = e; + /** + * Cancel all the remaining futures, this way if an exception occur, we don't needlessly occupy threads in the pool. + */ + private void cancelRemainingFutures( List> futures, List objects ) { + Assert.isTrue( futures.size() == objects.size(), "The number of futures must match the number of descriptive objects." ); + List incompleteTasks = new ArrayList<>( futures.size() ); + for ( Future future : futures ) { + if ( !future.isDone() ) { + future.cancel( true ); + incompleteTasks.add( objects.get( futures.indexOf( future ) ).toString() ); + } } - - @Override - public synchronized OntologySearchException getCause() { - return cause; + if ( !incompleteTasks.isEmpty() ) { + log.warn( "The following tasks did not have time to reply and were cancelled:\n\t" + + String.join( "\n\t", incompleteTasks ) ); } } - - } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/DefaultHighlighter.java b/gemma-core/src/main/java/ubic/gemma/core/search/DefaultHighlighter.java index 499aca3250..4991ff9981 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/DefaultHighlighter.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/DefaultHighlighter.java @@ -4,8 +4,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; -import org.apache.lucene.search.highlight.QueryScorer; +import ubic.gemma.core.search.lucene.LuceneHighlighter; import ubic.gemma.core.search.lucene.SimpleHTMLFormatter; import javax.annotation.Nullable; @@ -13,26 +14,40 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; -import java.util.Set; @CommonsLog -public class DefaultHighlighter implements Highlighter { +public class DefaultHighlighter implements LuceneHighlighter, OntologyHighlighter { + + private final Formatter formatter; + + public DefaultHighlighter() { + this( new SimpleHTMLFormatter() ); + } + + public DefaultHighlighter( Formatter formatter ) { + this.formatter = formatter; + } + + @Override + public Map highlight( String value, String field ) { + return Collections.singletonMap( field, value ); + } @Override public Map highlightTerm( @Nullable String termUri, String termLabel, String field ) { - return Collections.emptyMap(); + return Collections.singletonMap( field, termLabel ); } @Override - public org.apache.lucene.search.highlight.Highlighter createLuceneHighlighter( QueryScorer queryScorer ) { - return new org.apache.lucene.search.highlight.Highlighter( new SimpleHTMLFormatter(), queryScorer ); + public Formatter getFormatter() { + return formatter; } @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ) { + public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ) { Map highlights = new HashMap<>(); for ( Fieldable field : document.getFields() ) { - if ( !field.isTokenized() || field.isBinary() || !fields.contains( field.name() ) ) { + if ( !field.isTokenized() || field.isBinary() ) { continue; } try { diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/FieldAwareSearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/FieldAwareSearchSource.java new file mode 100644 index 0000000000..d1c584f991 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/search/FieldAwareSearchSource.java @@ -0,0 +1,20 @@ +package ubic.gemma.core.search; + +import org.apache.lucene.queryParser.QueryParser; +import ubic.gemma.model.common.Identifiable; +import ubic.gemma.model.common.search.SearchSettings; + +import java.util.Set; + +/** + * Search source that can retrieve results matching specific fields. + * @author poirigui + * @see ubic.gemma.core.search.lucene.LuceneQueryUtils#parseSafely(SearchSettings, QueryParser) + */ +public interface FieldAwareSearchSource extends SearchSource { + + /** + * Obtain a list of fields that can be searched on. + */ + Set getFields( Class entityClass ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/Highlighter.java b/gemma-core/src/main/java/ubic/gemma/core/search/Highlighter.java index ff170ec51a..c8efd6d5ea 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/Highlighter.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/Highlighter.java @@ -1,12 +1,6 @@ package ubic.gemma.core.search; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.search.highlight.QueryScorer; - -import javax.annotation.Nullable; import java.util.Map; -import java.util.Set; /** * Custom highlighter for search results. @@ -16,23 +10,7 @@ public interface Highlighter { /** - * Produce a highlight for a given ontology term. - * - * @param termUri a URI for the term or null for a full-text term - * @param termLabel a label for the term - * @param field an object path through which the term was found - * @return a suitable highlight, or null if none is found - */ - Map highlightTerm( @Nullable String termUri, String termLabel, String field ); - - /** - * Obtain a highlighter for Lucene hits to be used with {@link #highlightDocument(Document, org.apache.lucene.search.highlight.Highlighter, Analyzer, Set)}. - */ - @Nullable - org.apache.lucene.search.highlight.Highlighter createLuceneHighlighter( QueryScorer queryScorer ); - - /** - * Highlight a given Lucene document. + * Produce a highlight for a given field. */ - Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ); + Map highlight( String value, String field ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/IndexerService.java b/gemma-core/src/main/java/ubic/gemma/core/search/IndexerService.java index ab8da73119..5c549df5b0 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/IndexerService.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/IndexerService.java @@ -2,8 +2,6 @@ import ubic.gemma.model.common.Identifiable; -import java.util.Set; - /** * Indexer service. * @author poirigui @@ -11,15 +9,18 @@ public interface IndexerService { /** - * Index all the searchable entities. - * @param numThreads number of threads to use for loading and indexing + * Index the given class. + * @param classToIndex a set of classes to index + */ + void index( Class classToIndex ); + + /** + * Set the number of threads to use for indexing entities. */ - void index( int numThreads ); + void setNumThreads( int numThreads ); /** - * Index all the given classes. - * @param classesToIndex a set of classes to index - * @param numThreads number of threads to use for loading and indexing + * Set the logging frequency for reporting progress. */ - void index( Set> classesToIndex, int numThreads ); + void setLoggingFrequency( int loggingFrequency ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/IndexerServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/search/IndexerServiceImpl.java index 37c8e8e9f4..c3bf2217a7 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/IndexerServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/IndexerServiceImpl.java @@ -6,35 +6,25 @@ import org.hibernate.search.impl.SimpleIndexingProgressMonitor; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import org.springframework.util.Assert; import ubic.gemma.model.common.Identifiable; -import java.util.Set; - -@Service +@Service("indexerService") public class IndexerServiceImpl implements IndexerService { @Autowired private SessionFactory sessionFactory; - @Override - public void index( int numThreads ) { - doIndex( new Class[0], numThreads ); - } + private int numThreads = 4; + private int loggingFrequency = 1000; @Override - public void index( Set> classesToIndex, int numThreads ) { - if ( classesToIndex.isEmpty() ) { - return; - } - doIndex( classesToIndex.toArray( new Class[0] ), numThreads ); - } - - private void doIndex( Class[] classesToIndex, int numThreads ) { + public void index( Class classToIndex ) { FullTextSession fullTextSession = Search.getFullTextSession( sessionFactory.openSession() ); try { - fullTextSession.createIndexer( classesToIndex ) + fullTextSession.createIndexer( classToIndex ) .threadsToLoadObjects( numThreads ) - .progressMonitor( new SimpleIndexingProgressMonitor( 10000 ) ) + .progressMonitor( new SimpleIndexingProgressMonitor( loggingFrequency ) ) .startAndWait(); } catch ( InterruptedException e ) { Thread.currentThread().interrupt(); @@ -43,4 +33,16 @@ private void doIndex( Class[] classesToIndex, int numThreads ) { fullTextSession.close(); } } + + @Override + public void setNumThreads( int numThreads ) { + Assert.isTrue( numThreads > 0, "The number of threads must be strictly positive." ); + this.numThreads = numThreads; + } + + @Override + public void setLoggingFrequency( int loggingFrequency ) { + Assert.isTrue( loggingFrequency > 0, "The logging frequency must be strictly positive." ); + this.loggingFrequency = loggingFrequency; + } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/OntologyHighlighter.java b/gemma-core/src/main/java/ubic/gemma/core/search/OntologyHighlighter.java new file mode 100644 index 0000000000..c6408986af --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/search/OntologyHighlighter.java @@ -0,0 +1,21 @@ +package ubic.gemma.core.search; + +import javax.annotation.Nullable; +import java.util.Map; + +/** + * Highlighter specialized for ontology terms. + * @author poirigui + */ +public interface OntologyHighlighter extends Highlighter { + + /** + * Produce a highlight for a given ontology term. + * + * @param termUri a URI for the term or null for a full-text term + * @param termLabel a label for the term + * @param field an object path through which the term was found + * @return a suitable highlight, or null if none is found + */ + Map highlightTerm( @Nullable String termUri, String termLabel, String field ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/ParseSearchException.java b/gemma-core/src/main/java/ubic/gemma/core/search/ParseSearchException.java new file mode 100644 index 0000000000..a50708197d --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/search/ParseSearchException.java @@ -0,0 +1,34 @@ +package ubic.gemma.core.search; + +import org.apache.lucene.queryParser.ParseException; + +import javax.annotation.Nullable; + +/** + * An exception that indicate that the search query could not be parsed. + *

+ * When that occurs, we typically reattempt to parse the query. + */ +public class ParseSearchException extends SearchException { + + @Nullable + private final ParseSearchException originalParseException; + + public ParseSearchException( String message, Throwable cause ) { + super( message, cause ); + this.originalParseException = null; + } + + public ParseSearchException( String message, Throwable cause, ParseSearchException originalParseException ) { + super( message, cause ); + this.originalParseException = originalParseException; + } + + /** + * The original {@link ParseException} if this query was reattempted. + */ + @Nullable + public ParseSearchException getOriginalParseException() { + return originalParseException; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/SearchResult.java b/gemma-core/src/main/java/ubic/gemma/core/search/SearchResult.java index 727e6a1a36..d075b76c05 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/SearchResult.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/SearchResult.java @@ -20,88 +20,59 @@ import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.ToString; +import lombok.RequiredArgsConstructor; +import org.springframework.util.Assert; import ubic.gemma.model.common.Identifiable; -import ubic.gemma.model.common.description.CharacteristicValueObject; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.util.Comparator; import java.util.Map; -import java.util.Objects; +import java.util.stream.Collectors; /** + * Represents an individual search result. + *

+ * Search result minimally have a type and ID and may have their result object populated at a later time via {@link #setResultObject(Identifiable)}. + *

+ * Results have a score and possibly number of highlights. Two results are considered equal if they have the same type + * and ID. You may use a {@link SearchResultSet} to combine results in a sensible way, retaining result objects and + * highlights when a better result is added. * @author paul + * @author poirigui + * @see SearchSource + * @see SearchResultSet */ @Data +@RequiredArgsConstructor @EqualsAndHashCode(of = { "resultType", "resultId" }) -@ToString(of = { "resultType", "resultId", "resultType", "highlights", "score", "source" }) public class SearchResult implements Comparable> { - /** - * Obtain a comparator for this search result. - *

- * Results are compared by {@link #getScore()} in descending order. Note that any search result can be compared - * regardless of their result type or result object. - */ - public static Comparator> getComparator() { - return Comparator.comparing( SearchResult::getScore, Comparator.reverseOrder() ); - } + private static final Comparator> COMPARATOR = Comparator.comparing( SearchResult::getScore, Comparator.reverseOrder() ); /** - * Create a search result whose result class differ from the object. + * Create a search result from a given identifiable entity. *

- * This can be useful if you wrap a proxy, or don't want to expose the object class publicly. For example, our - * {@link ubic.gemma.model.association.phenotype.PhenotypeAssociation} use a {@link CharacteristicValueObject} - * for the result object. + * The result can be cleared later with {@link #clearResultObject()}. */ public static SearchResult from( Class resultType, T entity, double score, @Nullable Map highlights, Object source ) { - if ( entity.getId() == null ) { - throw new IllegalArgumentException( "Entity ID cannot be null." ); - } + Assert.notNull( entity.getId(), "The entity ID cannot be null." ); SearchResult sr = new SearchResult<>( resultType, entity.getId(), score, highlights, source ); sr.setResultObject( entity ); return sr; } - /** - * Shorthand for {@link #from(Class, Identifiable, double, String, Object)} if you don't need to set the score and - * highlighted text. - */ - public static SearchResult from( Class resultType, T entity, double score, Object source ) { - if ( entity.getId() == null ) { - throw new IllegalArgumentException( "Entity ID cannot be null." ); - } - SearchResult sr = new SearchResult<>( resultType, entity.getId(), score, null, source ); - sr.setResultObject( entity ); - return sr; - } - /** * Create a new provisional search result with a result type and ID. + *

+ * The result can be set later with {@link #setResultObject(Identifiable)}. */ public static SearchResult from( Class resultType, long entityId, double score, @Nullable Map highlights, Object source ) { return new SearchResult<>( resultType, entityId, score, highlights, source ); } - public static SearchResult from( Class resultType, long entityId, double score, Object source ) { - return new SearchResult<>( resultType, entityId, score, null, source ); - } - /** - * Create a search result from an existing one, replacing the result object with the target one. - *

- * This is useful if you need to convert the result object (i.e. to a VO) while preserving the metadata (score, - * highlighted text, etc.). - */ - public static SearchResult from( SearchResult original, @Nullable T newResultObject ) { - SearchResult sr = new SearchResult<>( original.resultType, original.resultId, original.score, original.highlights, original.source ); - sr.setResultObject( newResultObject ); - return sr; - } - - /** - * Class of the result, immutable. + * Type of search result, immutable. */ private final Class resultType; @@ -113,7 +84,7 @@ public static SearchResult from( SearchResult ori /** * Result object this search result is referring to. *

- * This can be null, at least initially if the resultClass and objectId are provided. + * This can be null, at least initially if the resultType and resultId are provided. *

* It may also be replaced at a later time via {@link #setResultObject(Identifiable)}. */ @@ -121,17 +92,17 @@ public static SearchResult from( SearchResult ori private T resultObject; /** - * Highlights for this result. - *

- * Keys are fields of {@link T} and values are substrings that matched. + * Score for ranking this result among other results. */ - @Nullable - private Map highlights; + private final double score; /** - * Score for ranking this result among other results. + * Highlights for this result. + *

+ * Keys are fields of {@link T} and values are substrings that were matched. */ - private final double score; + @Nullable + private final Map highlights; /** * Object representing the source of this result object. @@ -140,23 +111,18 @@ public static SearchResult from( SearchResult ori */ private final Object source; - /** - * Placeholder for provisional search results. - *

- * This is used when the class and ID is known beforehand, but the result hasn't been retrieve yet from persistent - * storage. - */ - private SearchResult( Class entityClass, long entityId, double score, @Nullable Map highlights, Object source ) { - this.resultType = entityClass; - this.resultId = entityId; - this.score = score; - this.highlights = highlights; - this.source = source; + @Override + public int compareTo( SearchResult o ) { + return COMPARATOR.compare( this, o ); } @Override - public int compareTo( SearchResult o ) { - return getComparator().compare( this, o ); + public String toString() { + return String.format( "%s Id=%d Score=%.2f%s Source=%s %s", resultType.getSimpleName(), resultId, + score, + highlights != null ? " Highlights=" + highlights.keySet().stream().sorted().collect( Collectors.joining( "," ) ) : "", + source, + resultObject != null ? "[Not Filled]" : "[Filled]" ); } /** @@ -173,15 +139,44 @@ public Long getResultId() { /** * Set the result object. * - * @throws IllegalArgumentException if the provided result object IDs differs from {@link #getResultId()}. + * @throws IllegalArgumentException if the provided result object is null or if its ID differs from {@link #getResultId()}. */ - public void setResultObject( @Nullable T resultObject ) { - if ( resultObject != null && resultObject.getId() == null ) { - throw new IllegalArgumentException( "The result object ID cannot be null." ); + public void setResultObject( T resultObject ) { + Assert.notNull( resultObject, "The result object cannot be null, use clearResultObject() to unset it." ); + Assert.notNull( resultObject.getId(), "The result object ID cannot be null." ); + Assert.isTrue( resultObject.getId().equals( this.resultId ), "The result object cannot be replaced with one that has a different ID." ); + this.resultObject = resultObject; + } + + /** + * Clear the result object. + */ + public void clearResultObject() { + this.resultObject = null; + } + + /** + * Create a search result from an existing one, replacing the result object with the target one. + *

+ * The new result object does not have to be of the same type as the original result object. This is useful if you + * need to convert the result object (i.e. to a VO) while preserving the metadata (score, highlighted text, etc.). + */ + public SearchResult withResultObject( @Nullable S resultObject ) { + SearchResult searchResult = new SearchResult<>( resultType, resultId, score, highlights, source ); + if ( resultObject != null ) { + searchResult.setResultObject( resultObject ); } - if ( resultObject != null && !Objects.equals( resultObject.getId(), this.resultId ) ) { - throw new IllegalArgumentException( "The result object cannot be replaced with one that has a different ID." ); + return searchResult; + } + + /** + * Copy this search result with the given highlights. + */ + public SearchResult withHighlights( Map highlights ) { + SearchResult searchResult = new SearchResult<>( resultType, resultId, score, highlights, source ); + if ( resultObject != null ) { + searchResult.setResultObject( resultObject ); } - this.resultObject = resultObject; + return searchResult; } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/SearchResultSet.java b/gemma-core/src/main/java/ubic/gemma/core/search/SearchResultSet.java index e46ac6b4c0..9a689e406a 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/SearchResultSet.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/SearchResultSet.java @@ -1,6 +1,7 @@ package ubic.gemma.core.search; import ubic.gemma.model.common.Identifiable; +import ubic.gemma.model.common.search.SearchSettings; import java.util.AbstractSet; import java.util.HashMap; @@ -12,15 +13,25 @@ *

* If a better result is added to the set, it replaces the existing one. If the original result had a non-null * {@link SearchResult#getResultObject()}, it is transferred over so that it won't need to be filled later on if needed. - * + *

+ * The collection also honor the {@link SearchSettings#getMaxResults()} value, rejecting any new result unless replacing + * an existing one. * @author poirigui */ public class SearchResultSet extends AbstractSet> { + private final SearchSettings settings; + private final Map, SearchResult> results; - public SearchResultSet() { - results = new HashMap<>(); + public SearchResultSet( SearchSettings settings ) { + this.settings = settings; + this.results = new HashMap<>(); + } + + public SearchResultSet( SearchSettings settings, int initialCapacity ) { + this.settings = settings; + this.results = new HashMap<>( initialCapacity ); } @Override @@ -36,22 +47,45 @@ public int size() { @Override public boolean add( SearchResult t ) { SearchResult previousValue = results.get( t ); - if ( previousValue == null || t.getScore() > previousValue.getScore() ) { - results.put( t, t ); - // retain the result object to avoid fetching it again in the future - if ( previousValue != null && previousValue.getResultObject() != null && t.getResultObject() == null ) { - t.setResultObject( previousValue.getResultObject() ); + if ( previousValue == t ) { + // no need to copy or merge anything if its the same object + return false; + } + SearchResult newValue; + boolean replaced; + if ( previousValue == null ) { + if ( settings.getMaxResults() > 0 && size() >= settings.getMaxResults() ) { + // max size reached and not replacing a previous value + return false; + } + newValue = t; + replaced = true; + } else { + if ( t.getScore() > previousValue.getScore() ) { + newValue = t; + replaced = true; + } else { + // new value is unchanged, so treat the passed argument as the previous value for copy-over purposes + newValue = previousValue; + previousValue = t; + replaced = false; + } + // copy-over the previous result object if necessary + if ( previousValue.getResultObject() != null && newValue.getResultObject() == null ) { + newValue = newValue.withResultObject( previousValue.getResultObject() ); } - // merge highlights - if ( previousValue != null && previousValue.getHighlights() != null ) { - Map mergedHighlights = new HashMap<>( previousValue.getHighlights() ); - if ( t.getHighlights() != null ) { - mergedHighlights.putAll( t.getHighlights() ); + // merge highlights if necessary + if ( previousValue.getHighlights() != null ) { + if ( newValue.getHighlights() != null ) { + Map mergedHighlights = new HashMap<>( previousValue.getHighlights() ); + mergedHighlights.putAll( newValue.getHighlights() ); + newValue = newValue.withHighlights( mergedHighlights ); + } else { + newValue = newValue.withHighlights( previousValue.getHighlights() ); } - t.setHighlights( mergedHighlights ); } - return true; } - return false; + results.put( newValue, newValue ); + return replaced; } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/SearchService.java b/gemma-core/src/main/java/ubic/gemma/core/search/SearchService.java index eb02e60c92..a961b209b8 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/SearchService.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/SearchService.java @@ -43,6 +43,11 @@ interface SearchResultMap { List> toList(); } + /** + * Obtain a list of fields that can be searched on for the given result type. + */ + Set getFields( Class resultType ); + /** * The results are sorted in order of decreasing score, organized by class. The following objects can be searched * for, depending on the configuration of the input object. diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java index ad019b97f6..b8c17977c5 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java @@ -24,7 +24,6 @@ import org.apache.commons.collections4.SetUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; -import org.apache.commons.text.StringEscapeUtils; import org.springframework.beans.factory.InitializingBean; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; @@ -35,17 +34,15 @@ import org.springframework.transaction.annotation.Transactional; import org.springframework.util.Assert; import org.springframework.util.LinkedMultiValueMap; -import ubic.gemma.core.association.phenotype.PhenotypeAssociationManagerService; import ubic.gemma.core.genome.gene.service.GeneSearchService; -import ubic.gemma.core.genome.gene.service.GeneService; import ubic.gemma.core.search.source.CompositeSearchSource; -import ubic.gemma.core.search.source.DatabaseSearchSource; import ubic.gemma.model.IdentifiableValueObject; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.association.phenotype.PhenotypeAssociation; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.BibliographicReferenceValueObject; +import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.BlacklistedEntity; import ubic.gemma.model.expression.BlacklistedValueObject; @@ -62,10 +59,8 @@ import ubic.gemma.model.genome.gene.GeneSet; import ubic.gemma.model.genome.gene.GeneSetValueObject; import ubic.gemma.model.genome.gene.GeneValueObject; -import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; -import ubic.gemma.persistence.service.expression.experiment.BlacklistedEntityService; import ubic.gemma.persistence.service.genome.taxon.TaxonService; import ubic.gemma.persistence.util.EntityUtils; @@ -74,7 +69,8 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; -import static ubic.gemma.core.search.source.DatabaseSearchSourceUtils.prepareDatabaseQuery; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.extractTerms; +import static ubic.gemma.core.search.source.DatabaseSearchSource.NCBI_GENE_ID_URI_PREFIX; /** * This service is used for performing searches using free text or exact matches to items in the database. @@ -131,26 +127,12 @@ private void addAll( Collection> search } } - - /** - * Penalty hit for indirect hit (multiplicative). - *

- * For example, if a platform is matched by a gene hit (score = 1.0), the score will be multiplied by this penalty - * (score = 0.8 * 1.0 = 0.8). - */ - private static final double INDIRECT_HIT_PENALTY = 0.8; - - private static final String NCBI_GENE = "ncbi_gene"; - private final Map nameToTaxonMap = new LinkedHashMap<>(); /* sources */ @Autowired - @Qualifier("databaseSearchSource") - private SearchSource databaseSearchSource; - @Autowired - @Qualifier("hibernateSearchSource") - private SearchSource hibernateSearchSource; + private List searchSources; + @Autowired @Qualifier("ontologySearchSource") private SearchSource ontologySearchSource; @@ -161,27 +143,30 @@ private void addAll( Collection> search @Autowired private GeneSearchService geneSearchService; @Autowired - private GeneService geneService; - @Autowired - private PhenotypeAssociationManagerService phenotypeAssociationManagerService; - - // TODO: use services instead of DAO here - @Autowired - private BlacklistedEntityService blacklistedEntityService; - @Autowired private TaxonService taxonService; @Autowired @Qualifier("valueObjectConversionService") private ConversionService valueObjectConversionService; + /** + * Mapping of supported result types to their corresponding VO type. + */ private final Map, Class>> supportedResultTypes = new HashMap<>(); - /** - * A composite search source. + * A composite search source that combines all the search sources. */ - private SearchSource searchSource; + private CompositeSearchSource searchSource; + + @Override + public Set getFields( Class resultType ) { + return searchSources.stream() + .filter( s -> s instanceof FieldAwareSearchSource ) + .map( s -> ( ( FieldAwareSearchSource ) s ).getFields( resultType ) ) + .flatMap( Set::stream ) + .collect( Collectors.toSet() ); + } /* * This is the method used by the main search page. @@ -195,16 +180,53 @@ public SearchResultMap search( SearchSettings settings ) throws SearchException StopWatch timer = StopWatch.createStarted(); - SearchResultMapImpl results; - if ( settings.isTermQuery() ) { - // we only attempt an ontology search if the uri looks remotely like a url. - results = this.ontologyUriSearch( settings ); - } else { - results = this.generalSearch( settings ); + // attempt to infer a taxon from the query if missing + if ( settings.getTaxon() == null ) { + settings = settings.withTaxon( inferTaxon( settings ) ); + } + + // If nothing to search return nothing. + if ( StringUtils.isBlank( settings.getQuery() ) ) { + return new SearchResultMapImpl(); + } + + // Get the top N results for each class. + SearchResultMapImpl results = new SearchResultMapImpl(); + // do gene first before we munge the query too much. + if ( settings.hasResultType( Gene.class ) ) { + results.addAll( this.geneSearch( settings ) ); + } + if ( settings.hasResultType( ExpressionExperiment.class ) ) { + results.addAll( this.expressionExperimentSearch( settings ) ); + } + if ( settings.hasResultType( CompositeSequence.class ) ) { + results.addAll( this.compositeSequenceSearch( settings ) ); + } + if ( settings.hasResultType( ArrayDesign.class ) ) { + results.addAll( searchSource.searchArrayDesign( settings ) ); + } + if ( settings.hasResultType( BioSequence.class ) ) { + results.addAll( searchSource.searchBioSequence( settings ) ); + } + if ( settings.hasResultType( Gene.class ) && settings.isUseGo() ) { + results.addAll( this.dbHitsToSearchResult( settings, Gene.class, geneSearchService.getGOGroupGenes( settings.getQuery(), settings.getTaxon() ), + 0.8, Collections.singletonMap( "GO Group", "From GO group" ), "GeneSearchService.getGOGroupGenes" ) ); + } + if ( settings.hasResultType( BibliographicReference.class ) ) { + results.addAll( searchSource.searchBibliographicReference( settings ) ); + } + if ( settings.hasResultType( GeneSet.class ) ) { + results.addAll( searchSource.searchGeneSet( settings ) ); + } + if ( settings.hasResultType( ExpressionExperimentSet.class ) ) { + results.addAll( searchSource.searchExperimentSet( settings ) ); + } + if ( settings.hasResultType( BlacklistedEntity.class ) ) { + results.addAll( searchSource.searchBlacklistedEntities( settings ) ); } if ( !settings.isFillResults() ) { - results.forEach( ( k, v ) -> v.forEach( sr -> sr.setResultObject( null ) ) ); + results.forEach( ( k, v ) -> v.forEach( SearchResult::clearResultObject ) ); } if ( !results.isEmpty() ) { @@ -212,7 +234,6 @@ public SearchResultMap search( SearchSettings settings ) throws SearchException } return results; - } /* @@ -226,7 +247,7 @@ public Set> getSupportedResultTypes() { @Override public void afterPropertiesSet() throws Exception { - searchSource = new CompositeSearchSource( Arrays.asList( databaseSearchSource, hibernateSearchSource, ontologySearchSource ) ); + searchSource = new CompositeSearchSource( searchSources ); initializeSupportedResultTypes(); this.initializeNameToTaxonMap(); } @@ -245,7 +266,7 @@ private void initializeSupportedResultTypes() { canConvertFromEntity( e.getKey(), e.getValue() ); canConvertFromId( e.getValue() ); } - // this is a special case because it's non-trivial to perform the conversion + // FIXME: remove this in the 1.32 series, we still allow selecting Phenotypes from the UI supportedResultTypes.put( PhenotypeAssociation.class, CharacteristicValueObject.class ); } @@ -272,7 +293,7 @@ public > SearchResu // null sf a valid state if the original result is provisional, the converter is capable of retrieving the VO by ID T resultObject = searchResult.getResultObject(); //noinspection unchecked - return SearchResult.from( searchResult, ( U ) valueObjectConversionService.convert( + return searchResult.withResultObject( ( U ) valueObjectConversionService.convert( resultObject != null ? resultObject : searchResult.getResultId(), supportedResultTypes.get( searchResult.getResultType() ) ) ); } catch ( ConverterNotFoundException e ) { @@ -301,9 +322,7 @@ private List>> loadValueObject List entitiesIds = new ArrayList<>(); List> entitiesVos = new ArrayList<>(); for ( SearchResult result : results ) { - if ( PhenotypeAssociation.class.equals( resultType ) ) { - entitiesVos.add( ( CharacteristicValueObject ) result.getResultObject() ); - } else if ( resultType.isInstance( result.getResultObject() ) ) { + if ( resultType.isInstance( result.getResultObject() ) ) { entities.add( result.getResultObject() ); } else { entitiesIds.add( result.getResultId() ); @@ -319,8 +338,7 @@ private List>> loadValueObject TypeDescriptor.collection( List.class, TypeDescriptor.valueOf( supportedResultTypes.get( resultType ) ) ) ) ); } - // FIXME: PhenotypeAssociation does not support conversion from IDs, but once it does or if it's removed, - // then we don't need to check isEmpty() + // convert IDs to VOs if ( !entitiesIds.isEmpty() ) { //noinspection unchecked entitiesVos.addAll( ( List> ) @@ -337,13 +355,15 @@ private List>> loadValueObject List>> resultsVo = new ArrayList<>( results.size() ); for ( SearchResult sr : results ) { if ( entityVosById.containsKey( sr.getResultId() ) ) { - resultsVo.add( SearchResult.from( sr, entityVosById.get( sr.getResultId() ) ) ); + IdentifiableValueObject newResultObject = entityVosById.get( sr.getResultId() ); + resultsVo.add( sr.withResultObject( newResultObject ) ); } else if ( sr.getResultObject() == null ) { // result was originally unfilled and nothing was found, so it's somewhat safe to restore it if ( sr.getHighlights() != null ) { resultsVo.add( SearchResult.from( sr.getResultType(), sr.getResultId(), sr.getScore(), sr.getHighlights(), sr.getSource() ) ); } else { - resultsVo.add( SearchResult.from( sr.getResultType(), sr.getResultId(), sr.getScore(), sr.getSource() ) ); + long entityId = sr.getResultId(); + resultsVo.add( SearchResult.from( sr.getResultType(), entityId, sr.getScore(), null, sr.getSource() ) ); } } else { // this happens if the VO was filtered out after VO conversion (i.e. via ACL) or uninitialized @@ -361,399 +381,10 @@ private List>> loadValueObject return resultsVo; } - /** - * Checks whether settings have the search genes flag and does the search if needed. - * - * @param results the results to which should any new results be accreted. - */ - private void accreteResultsGenes( LinkedHashSet> results, SearchSettings settings ) throws SearchException { - if ( settings.hasResultType( Gene.class ) ) { - Collection> genes = this.getGenesFromSettings( settings ); - results.addAll( genes ); - } - } - - /** - * Checks settings for all do-search flags, except for gene (see - * {@link #accreteResultsGenes(LinkedHashSet, SearchSettings)}), and does the search if needed. - * - * @param results the results to which should any new results be accreted. - * @param settings search settings - */ - private void accreteResultsOthers( LinkedHashSet> results, SearchSettings settings ) throws SearchException { - - Collection> blacklistedResults = new SearchResultSet<>(); - - if ( settings.hasResultType( ExpressionExperiment.class ) ) { - results.addAll( this.expressionExperimentSearch( settings, blacklistedResults ) ); - } - - Collection> compositeSequences = null; - if ( settings.hasResultType( CompositeSequence.class ) ) { - compositeSequences = this.compositeSequenceSearch( settings ); - results.addAll( compositeSequences ); - } - - if ( settings.hasResultType( ArrayDesign.class ) ) { - results.addAll( this.arrayDesignSearch( settings, compositeSequences, blacklistedResults ) ); - } - - if ( settings.hasResultType( BioSequence.class ) ) { - Collection> genes = this.getGenesFromSettings( settings ); - Collection> bioSequencesAndGenes = this.bioSequenceAndGeneSearch( settings, genes ); - - // split results so that accreteResults can be properly typed - - //noinspection unchecked - Collection> bioSequences = bioSequencesAndGenes.stream() - .filter( result -> BioSequence.class.equals( result.getResultType() ) ) - .map( result -> ( SearchResult ) result ) - .collect( Collectors.toSet() ); - results.addAll( bioSequences ); - - //noinspection unchecked - Collection> gen = bioSequencesAndGenes.stream() - .filter( result -> Gene.class.equals( result.getResultType() ) ) - .map( result -> ( SearchResult ) result ) - .collect( Collectors.toSet() ); - results.addAll( gen ); - } - - if ( settings.hasResultType( Gene.class ) && settings.isUseGo() ) { - // FIXME: add support for OR, but there's a bug in baseCode that prevents this https://github.com/PavlidisLab/baseCode/issues/22 - String query = settings.getQuery().replaceAll( "\\s+OR\\s+", "" ); - results.addAll( this.dbHitsToSearchResult( - Gene.class, geneSearchService.getGOGroupGenes( query, settings.getTaxon() ), 0.8, Collections.singletonMap( "GO Group", "From GO group" ), "GeneSearchService.getGOGroupGenes" ) ); - } - - if ( settings.hasResultType( BibliographicReference.class ) ) { - results.addAll( this.searchSource.searchBibliographicReference( settings ) ); - } - - if ( settings.hasResultType( GeneSet.class ) ) { - results.addAll( this.geneSetSearch( settings ) ); - } - - if ( settings.hasResultType( ExpressionExperimentSet.class ) ) { - results.addAll( this.experimentSetSearch( settings ) ); - } - - if ( settings.hasResultType( PhenotypeAssociation.class ) ) { - results.addAll( searchPhenotype( settings ) ); - } - - if ( settings.hasResultType( BlacklistedEntity.class ) ) { - results.addAll( blacklistedResults ); - } - } - - /** - * Find phenotypes. - */ - private Collection> searchPhenotype( SearchSettings settings ) throws SearchException { - if ( !settings.isUseDatabase() ) - return Collections.emptySet(); - // FIXME: add support for OR, but there's a bug in baseCode that prevents this https://github.com/PavlidisLab/baseCode/issues/22 - String query = settings.getQuery().replaceAll( "\\s+OR\\s+", "" ); - return this.phenotypeAssociationManagerService.searchInDatabaseForPhenotype( query, settings.getMaxResults() ).stream() - .map( r -> SearchResult.from( PhenotypeAssociation.class, r, 1.0, "PhenotypeAssociationManagerService.searchInDatabaseForPhenotype" ) ) - .collect( Collectors.toCollection( SearchResultSet::new ) ); - } - - // /** - // * Convert biomaterial hits into their associated ExpressionExperiments - // * - // * @param results will go here - // * @param biomaterials - // */ - // private void addEEByBiomaterials( Collection results, Map biomaterials ) { - // if ( biomaterials.size() == 0 ) { - // return; - // } - // Map ees = expressionExperimentService - // .findByBioMaterials( biomaterials.keySet() ); - // for ( ExpressionExperiment ee : ees.keySet() ) { - // SearchResult searchResult = biomaterials.get( ees.get( ee ) ); - // results.add( new SearchResult( ee, searchResult.getScore() * SearchServiceImpl.INDIRECT_DB_HIT_PENALTY, - // searchResult.getHighlightedText() + " (BioMaterial characteristic)" ) ); - // } - // } - // - // /** - // * Convert biomaterial hits into their associated ExpressionExperiments - // * - // * @param results will go here - // * @param biomaterials - // */ - // private void addEEByBiomaterialIds( Collection results, Map biomaterials ) { - // if ( biomaterials.size() == 0 ) { - // return; - // } - // Map ees = expressionExperimentService - // .findByBioMaterialIds( biomaterials.keySet() ); - // for ( ExpressionExperiment ee : ees.keySet() ) { - // SearchResult searchResult = biomaterials.get( ees.get( ee ) ); - // results.add( new SearchResult( ee, searchResult.getScore() * SearchServiceImpl.INDIRECT_DB_HIT_PENALTY, - // searchResult.getHighlightedText() + " (BioMaterial characteristic)" ) ); - // } - // } - // - // /** - // * Convert factorValue hits into their associated ExpressionExperiments - // * - // * @param results will go here - // * @param factorValues - // */ - // private void addEEByFactorvalueIds( Collection results, Map factorValues ) { - // if ( factorValues.size() == 0 ) { - // return; - // } - // Map ees = expressionExperimentService - // .findByFactorValueIds( factorValues.keySet() ); - // for ( ExpressionExperiment ee : ees.keySet() ) { - // SearchResult searchResult = factorValues.get( ees.get( ee ) ); - // results.add( new SearchResult( ee, searchResult.getScore() * SearchServiceImpl.INDIRECT_DB_HIT_PENALTY, - // searchResult.getHighlightedText() + " (FactorValue characteristic)" ) ); - // } - // - // } - // - // /** - // * Convert factorValue hits into their associated ExpressionExperiments - // * - // * @param results will go here - // * @param factorValues - // */ - // private void addEEByFactorvalues( Collection results, Map factorValues ) { - // if ( factorValues.size() == 0 ) { - // return; - // } - // Map ees = expressionExperimentService - // .findByFactorValues( factorValues.keySet() ); - // for ( ExpressionExperiment ee : ees.keySet() ) { - // SearchResult searchResult = factorValues.get( ees.get( ee ) ); - // results.add( new SearchResult( ee, searchResult.getScore() * SearchServiceImpl.INDIRECT_DB_HIT_PENALTY, - // searchResult.getHighlightedText() + " (FactorValue characteristic)" ) ); - // } - // - // } - - private void addTerms( Taxon taxon, String taxonName ) { - String[] terms; - if ( StringUtils.isNotBlank( taxonName ) ) { - terms = taxonName.split( "\\s+" ); - // Only continue for multi-word - if ( terms.length > 1 ) { - for ( String s : terms ) { - if ( !nameToTaxonMap.containsKey( s.trim().toLowerCase() ) ) { - nameToTaxonMap.put( s.trim().toLowerCase(), taxon ); - } - } - } - } - } - - private Collection> experimentSetSearch( SearchSettings settings ) throws SearchException { - return searchSource.searchExperimentSet( settings ); - } - - /** - * A general search for array designs. - * This search does both an database search and a compass search. This is also contains an underlying - * {@link CompositeSequence} search, returning the {@link ArrayDesign} collection for the given composite sequence - * search string (the returned collection of array designs does not contain duplicates). - * - * @param probeResults Collection of results from a previous CompositeSequence search. Can be null; otherwise used - * to avoid a second search for probes. The array designs for the probes are added to the final - * results. - */ - private Collection> arrayDesignSearch( SearchSettings settings, - @Nullable Collection> probeResults, Collection> blacklistedResults ) throws SearchException { - - StopWatch watch = StopWatch.createStarted(); - String searchString = prepareDatabaseQuery( settings ); - Collection> results = new SearchResultSet<>(); - - ArrayDesign shortNameResult = arrayDesignService.findByShortName( searchString ); - if ( shortNameResult != null ) { - results.add( SearchResult.from( ArrayDesign.class, shortNameResult, DatabaseSearchSource.MATCH_BY_SHORT_NAME_SCORE, "ArrayDesignService.findByShortName" ) ); - return results; - } - - Collection nameResult = arrayDesignService.findByName( searchString ); - if ( nameResult != null && !nameResult.isEmpty() ) { - for ( ArrayDesign ad : nameResult ) { - results.add( SearchResult.from( ArrayDesign.class, ad, DatabaseSearchSource.MATCH_BY_NAME_SCORE, "ArrayDesignService.findByShortName" ) ); - } - return results; - } - - if ( settings.hasResultType( BlacklistedEntity.class ) ) { - BlacklistedEntity b = blacklistedEntityService.findByAccession( searchString ); - if ( b != null ) { - blacklistedResults.add( SearchResult.from( BlacklistedEntity.class, b, DatabaseSearchSource.MATCH_BY_ACCESSION_SCORE, null, "BlacklistedEntityService.findByAccession" ) ); - return results; - } - } - - Collection altNameResults = arrayDesignService.findByAlternateName( searchString ); - for ( ArrayDesign arrayDesign : altNameResults ) { - results.add( SearchResult.from( ArrayDesign.class, arrayDesign, 0.9, "ArrayDesignService.findByAlternateName" ) ); - } - - Collection manufacturerResults = arrayDesignService.findByManufacturer( searchString ); - for ( ArrayDesign arrayDesign : manufacturerResults ) { - results.add( SearchResult.from( ArrayDesign.class, arrayDesign, 0.9, "ArrayDesignService.findByManufacturer" ) ); - } - - /* - * FIXME: add merged platforms and subsumers - */ - results.addAll( searchSource.searchArrayDesign( settings ) ); - - watch.stop(); - if ( watch.getTime() > 1000 ) - SearchServiceImpl.log.warn( "Array Design search for " + settings + " took " + watch.getTime() + " ms" ); - - return results; - } - - /** - * @param previousGeneSearchResults Can be null, otherwise used to avoid a second search for genes. The biosequences - * for the genes are added to the final results. - */ - private Collection> bioSequenceAndGeneSearch( SearchSettings settings, - Collection> previousGeneSearchResults ) throws SearchException { - StopWatch watch = StopWatch.createStarted(); - - Collection> searchResults = searchSource.searchBioSequenceAndGene( settings, previousGeneSearchResults ); - - watch.stop(); - if ( watch.getTime() > 1000 ) - SearchServiceImpl.log - .warn( "Biosequence search for " + settings + " took " + watch.getTime() + " ms " + searchResults - .size() + " results." ); - - return searchResults; - } - - /** - * Search via characteristics i.e. ontology terms. - *

- * This is an important type of search but also a point of performance issues. Searches for "specific" terms are - * generally not a big problem (yielding less than 100 results); searches for "broad" terms can return numerous - * (thousands) - * results. - */ - private Collection> characteristicEESearch( final SearchSettings settings ) throws SearchException { - - Collection> results = new SearchResultSet<>(); - - StopWatch watch = StopWatch.createStarted(); - - log.debug( "Starting EE search for " + settings ); - String[] subclauses = prepareDatabaseQuery( settings ).split( "\\s+OR\\s+" ); - for ( String subclause : subclauses ) { - /* - * Note that the AND is applied only within one entity type. The fix would be to apply AND at this - * level. - */ - Collection> classResults = this - .characteristicEESearchWithChildren( settings.withQuery( subclause ) ); - if ( classResults.size() > 0 ) { - log.debug( "... Found " + classResults.size() + " EEs matching " + subclause ); - } - results.addAll( classResults ); - } - - SearchServiceImpl.log.debug( String.format( "ExpressionExperiment search: %s -> %d characteristic-based hits %d ms", - settings, results.size(), watch.getTime() ) ); - - return results; - - } - - /** - * Search for the Experiment query in ontologies, including items that are associated with children of matching - * query terms. That is, 'brain' should return entities tagged as 'hippocampus'. It can handle AND in searches, so - * Parkinson's - * AND neuron finds items tagged with both of those terms. The use of OR is handled by the caller. - * - * @param settings search settings - * @return SearchResults of Experiments - */ - private Collection> characteristicEESearchWithChildren( SearchSettings settings ) throws SearchException { - StopWatch watch = StopWatch.createStarted(); - - /* - * The tricky part here is if the user has entered a boolean query. If they put in Parkinson's disease AND - * neuron, then we want to eventually return entities that are associated with both. We don't expect to find - * single characteristics that match both. - * - * But if they put in Parkinson's disease we don't want to do two queries. - */ - String[] subparts = settings.getQuery().split( " AND " ); - - // we would have to first deal with the separate queries, and then apply the logic. - Collection> allResults = new SearchResultSet<>(); - - SearchServiceImpl.log.debug( "Starting characteristic search for: " + settings ); - for ( String rawTerm : subparts ) { - String trimmed = StringUtils.strip( rawTerm ); - if ( StringUtils.isBlank( trimmed ) ) { - continue; - } - Collection> subqueryResults = ontologySearchSource.searchExpressionExperiment( settings.withQuery( trimmed ) ); - if ( allResults.isEmpty() ) { - allResults.addAll( subqueryResults ); - } else { - // this is our Intersection operation. - allResults.retainAll( subqueryResults ); - - // aggregate the highlighted text. - Map, String> highlights = new HashMap<>(); - for ( SearchResult sqr : subqueryResults ) { - if ( sqr.getHighlights() != null && sqr.getHighlights().containsKey( "term" ) ) { - highlights.put( sqr, sqr.getHighlights().get( "term" ) ); - } - } - - for ( SearchResult ar : allResults ) { - String k = highlights.get( ar ); - if ( StringUtils.isNotBlank( k ) ) { - if ( ar.getHighlights() != null ) { - if ( StringUtils.isBlank( ar.getHighlights().get( "term" ) ) ) { - ar.getHighlights().put( "term", k ); - } else { - ar.getHighlights().compute( "term", ( z, t ) -> t + "
" + k ); - } - } else { - ar.setHighlights( Collections.singletonMap( "term", k ) ); - } - } - } - } - - if ( watch.getTime() > 1000 ) { - SearchServiceImpl.log.warn( "Characteristic EE search for '" + rawTerm + "': " + allResults.size() - + " hits retained so far; " + watch.getTime() + "ms" ); - watch.reset(); - watch.start(); - } - - if ( isFilled( allResults, settings ) ) { - return allResults; - } - } - - return allResults; - - } - /** * Search by name of the composite sequence as well as gene. */ - private Collection> compositeSequenceSearch( SearchSettings settings ) throws SearchException { + private SearchResultSet compositeSequenceSearch( SearchSettings settings ) throws SearchException { StopWatch watch = StopWatch.createStarted(); @@ -763,12 +394,12 @@ private Collection> compositeSequenceSearch( Sea */ // Skip compass searching of composite sequences because it only bloats the results. - Collection> compositeSequenceResults = new HashSet<>( this.searchSource.searchCompositeSequenceAndGene( settings ) ); + Collection> compositeSequenceResults = this.searchSource.searchCompositeSequenceAndGene( settings ); /* * This last step is needed because the compassSearch for compositeSequences returns bioSequences too. */ - Collection> finalResults = new SearchResultSet<>(); + SearchResultSet finalResults = new SearchResultSet<>( settings ); for ( SearchResult sr : compositeSequenceResults ) { if ( CompositeSequence.class.equals( sr.getResultType() ) ) { //noinspection unchecked @@ -777,93 +408,19 @@ private Collection> compositeSequenceSearch( Sea } watch.stop(); - if ( watch.getTime() > 1000 ) - SearchServiceImpl.log - .warn( "Composite sequence search for " + settings + " took " + watch.getTime() + " ms, " - + finalResults.size() + " results." ); + if ( watch.getTime() > 1000 ) { + SearchServiceImpl.log.warn( String.format( "Composite sequence search for %s took %d ms, %d results.", + settings, watch.getTime(), finalResults.size() ) ); + } return finalResults; } - // private List convertEntitySearchResutsToValueObjectsSearchResults( - // Collection searchResults ) { - // List convertedSearchResults = new ArrayList<>(); - // StopWatch t = this.startTiming(); - // for ( SearchResult searchResult : searchResults ) { - // // this is a special case ... for some reason. - // if ( BioSequence.class.equals( searchResult.getResultClass() ) ) { - // SearchResult convertedSearchResult = new SearchResult( BioSequenceValueObject - // .fromEntity( bioSequenceService.thaw( ( BioSequence ) searchResult.getResultObject() ) ), - // searchResult.getScore(), searchResult.getHighlightedText() ); - // convertedSearchResults.add( convertedSearchResult ); - // } else { - // convertedSearchResults.add( searchResult ); - // } - // } - // if ( t.getTime() > 500 ) { - // log.info( "Conversion of " + searchResults.size() + " search results: " + t.getTime() + "ms" ); - // } - // return convertedSearchResults; - // } - - // /** - // * Takes a list of ontology terms, and classes of objects of interest to be returned. Looks through the - // * characteristic table for an exact match with the given ontology terms. Only tries to match the uri's. - // * - // * @param classes Class of objects to restrict the search to (typically ExpressionExperiment.class, for - // * example). - // * @param terms A list of ontology terms to search for - // * @return Collection of search results for the objects owning the found characteristics, where the owner is - // * of - // * class clazz - // */ - // private Collection databaseCharacteristicExactUriSearchForOwners( Collection> classes, - // Collection terms ) { - // - // // Collection characteristicValueMatches = new ArrayList(); - // Collection characteristicURIMatches = new ArrayList<>(); - // - // for ( OntologyTerm term : terms ) { - // // characteristicValueMatches.addAll( characteristicService.findByValue( term.getUri() )); - // characteristicURIMatches.addAll( characteristicService.findByUri( classes, term.getUri() ) ); - // } - // - // Map parentMap = characteristicService.getParents( classes, characteristicURIMatches ); - // // parentMap.putAll( characteristicService.getParents(characteristicValueMatches ) ); - // - // return this.filterCharacteristicOwnersByClass( classes, parentMap ); - // } - - // /** - // * Convert characteristic hits from database searches into SearchResults. - // * @param entities map of classes to characteristics e.g. Experiment.class -> annotated characteristics - // * @param matchText used in highlighting - // * - // * FIXME we need the ID of the annotated object if we do it this way - // */ - // private Collection dbCharacteristicHitsToSearchResultByClass( Map, Collection> entities, - // String matchText ) { - // // return this.dbHitsToSearchResult( entities, null, matchText ); - // - // List results = new ArrayList<>(); - // for ( Class clazz : entities.keySet() ) { - // - // for ( Characteristic c : entities.get( clazz ) ) { - // SearchResult esr = new SearchResult(clazz, /*ID NEEDED*/ , 1.0, matchText ); - // - // results.add( esr ); - // } - // - // } - // return results; - // - // } - /** * Convert hits from database searches into SearchResults. */ - private Collection> dbHitsToSearchResult( Class entityClass, Collection entities, double score, Map highlights, String source ) { + private SearchResultSet dbHitsToSearchResult( SearchSettings settings, Class entityClass, Collection entities, double score, Map highlights, String source ) { StopWatch watch = StopWatch.createStarted(); - List> results = new ArrayList<>( entities.size() ); + SearchResultSet results = new SearchResultSet<>( settings, entities.size() ); for ( T e : entities ) { if ( e == null ) { if ( log.isDebugEnabled() ) @@ -881,17 +438,6 @@ private Collection> dbHitsToSearchResul return results; } - // private void debugParentFetch( Map parentMap ) { - // /* - // * This is purely debugging. - // */ - // if ( parentMap.size() > 0 ) { - // if ( SearchServiceImpl.log.isDebugEnabled() ) - // SearchServiceImpl.log.debug( "Found " + parentMap.size() + " owners for " + parentMap.keySet().size() - // + " characteristics:" ); - // } - // } - /** * A key method for experiment search. This search does both an database search and a compass search, and looks at * several different associations. To allow maximum flexibility, we try not to limit the number of results here (it @@ -904,75 +450,45 @@ private Collection> dbHitsToSearchResul * SearchSettings.DEFAULT_MAX_RESULTS_PER_RESULT_TYPE * @return {@link Collection} of SearchResults */ - private Collection> expressionExperimentSearch( final SearchSettings settings, Collection> blacklistedResults ) throws SearchException { + private SearchResultSet expressionExperimentSearch( final SearchSettings settings ) throws SearchException { StopWatch totalTime = StopWatch.createStarted(); StopWatch watch = StopWatch.createStarted(); SearchServiceImpl.log.debug( ">>>>> Starting search for " + settings ); - Set> results = new SearchResultSet(); + SearchResultSet results = new SearchResultSet<>( settings ); // searches for GEO names, etc - "exact" matches. - if ( settings.isUseDatabase() ) { - results.addAll( this.searchSource.searchExpressionExperiment( settings ) ); - if ( watch.getTime() > 500 ) - SearchServiceImpl.log - .info( "Expression Experiment database search for " + settings + " took " + watch.getTime() - + " ms, " + results.size() + " hits." ); - - /* - * If we get results here, probably we want to just stop immediately, because the user is searching for - * something exact. In response to https://github.com/PavlidisLab/Gemma/issues/140 we continue if the user - * has admin status. - */ - if ( !results.isEmpty() && !SecurityUtil.isUserAdmin() ) { - return results; - } - - if ( settings.hasResultType( BlacklistedEntity.class ) ) { - BlacklistedEntity b = blacklistedEntityService.findByAccession( prepareDatabaseQuery( settings ) ); - if ( b != null ) { - blacklistedResults.add( SearchResult.from( BlacklistedEntity.class, b, DatabaseSearchSource.MATCH_BY_ACCESSION_SCORE, null, "BlacklistedEntityService.findByAccession" ) ); - return results; - } - } + results.addAll( searchSource.searchExpressionExperiment( settings ) ); + if ( watch.getTime() > 1000 ) + SearchServiceImpl.log.warn( String.format( "Expression Experiment database search for %s took %d ms, %d hits.", + settings, watch.getTime(), results.size() ) ); - watch.reset(); - watch.start(); + // in fast mode, stop now + if ( settings.getMode().equals( SearchSettings.SearchMode.FAST ) ) { + return results; } + /* + * If we get results here, probably we want to just stop immediately, because the user is searching for + * something exact. In response to https://github.com/PavlidisLab/Gemma/issues/140 we continue if the user + * has admin status. + */ + // special case: search for experiments associated with genes - Collection> geneHits = this.geneSearch( settings.withMode( SearchSettings.SearchMode.FAST ) ); - if ( geneHits.size() > 0 ) { - // TODO: make sure this is being hit correctly. + // this is achieved by crafting a URI with the NCBI gene id + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) || SecurityUtil.isUserAdmin() ) { + SearchResultSet geneHits = this.geneSearch( settings.withMode( SearchSettings.SearchMode.FAST ) ); for ( SearchResult gh : geneHits ) { Gene g = gh.getResultObject(); - if ( g == null ) { + if ( g == null || g.getNcbiGeneId() == null ) { continue; } - Integer ncbiGeneId = g.getNcbiGeneId(); - String geneUri = "http://" + NCBI_GENE + "/" + ncbiGeneId; // this is just enough to fool the search into looking by NCBI ID, but check working as expected - SearchSettings gss = SearchSettings.expressionExperimentSearch( geneUri ); - gss.setMaxResults( settings.getMaxResults() ); - gss.setTaxon( settings.getTaxon() ); - gss.setQuery( geneUri ); - // FIXME: there should be a nicer, typed way of doing ontology searches - results.addAll( ontologyUriSearch( gss ).getByResultObjectType( ExpressionExperiment.class ) ); + results.addAll( ontologySearchSource.searchExpressionExperiment( settings.withQuery( NCBI_GENE_ID_URI_PREFIX + g.getNcbiGeneId() ) ) ); } } - // fancy search that uses ontologies to infer related terms - if ( settings.isUseCharacteristics() ) { - results.addAll( this.characteristicEESearch( settings ) ); - if ( watch.getTime() > 500 ) - SearchServiceImpl.log - .warn( String.format( "Expression Experiment search via characteristics for %s took %d ms, %d hits.", - settings, watch.getTime(), results.size() ) ); - watch.reset(); - watch.start(); - } - /* * this should be unnecessary we we hit bibrefs in our regular lucene-index search. Also as written, this is * very slow @@ -1013,139 +529,55 @@ private Collection> expressionExperimentSearc * we may want to move this sooner, but we don't want to slow down the process if they are not searching by * array design */ - if ( results.isEmpty() ) { + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) || SecurityUtil.isUserAdmin() ) { watch.reset(); watch.start(); - Collection> matchingPlatforms = this.arrayDesignSearch( settings, null, blacklistedResults ); + Collection> matchingPlatforms = searchSource.searchArrayDesign( settings ); for ( SearchResult adRes : matchingPlatforms ) { ArrayDesign ad = adRes.getResultObject(); if ( ad != null ) { Collection expressionExperiments = this.arrayDesignService .getExpressionExperiments( ad ); - if ( expressionExperiments.size() > 0 ) - results.addAll( this.dbHitsToSearchResult( ExpressionExperiment.class, expressionExperiments, + if ( !expressionExperiments.isEmpty() ) + results.addAll( this.dbHitsToSearchResult( settings, ExpressionExperiment.class, expressionExperiments, 0.8, Collections.singletonMap( "arrayDesign", ad.getShortName() + " - " + ad.getName() ), String.format( "ArrayDesignService.getExpressionExperiments(%s)", ad ) ) ); } } - if ( watch.getTime() > 500 ) + if ( watch.getTime() > 1000 ) { SearchServiceImpl.log.warn( String.format( "Expression Experiment platform search for %s took %d ms, %d hits.", settings, watch.getTime(), results.size() ) ); - - if ( !results.isEmpty() ) { - return results; } } - if ( !settings.isFillResults() ) { - results.forEach( sr -> sr.setResultObject( null ) ); - } - String message = String.format( ">>>>>>> Expression Experiment search for %s took %d ms, %d hits.", settings, totalTime.getTime(), results.size() ); - if ( totalTime.getTime() > 500 ) { + if ( totalTime.getTime() > 1000 ) { SearchServiceImpl.log.warn( message ); } else { SearchServiceImpl.log.debug( message ); } return results; - - } - - // /** - // * - // * @param classes - // * @param characteristic2entity - // * @return - // */ - // private Collection filterCharacteristicOwnersByClass( Map, Collection> parents, String uri, String value ) { - // - // StopWatch t = this.startTiming(); - // Map biomaterials = new HashMap<>(); - // Map factorValues = new HashMap<>(); - // Collection results = new HashSet<>(); - // - // for ( Class clazz : parents.keySet() ) { - // for ( Long id : parents.get( clazz ) ) { - // String matchedText; - // - // if ( StringUtils.isNotBlank( uri ) ) { - // matchedText = "Tagged term: " + value + ""; - // } else { - // matchedText = "Free text: " + value; - // } - // - // if ( clazz.isAssignableFrom( BioMaterial.class ) ) { - // biomaterials.put( id, new SearchResult( clazz, id, 1.0, matchedText ) ); - // } else if ( clazz.isAssignableFrom( FactorValue.class ) ) { - // factorValues.put( id, new SearchResult( clazz, id, 1.0, matchedText ) ); - // } else if ( clazz.isAssignableFrom( ExpressionExperiment.class ) ) { - // results.add( new SearchResult( clazz, id, 1.0, matchedText ) ); - // } else { - // throw new IllegalStateException(); - // } - // } - // - // } - // - // this.addEEByFactorvalueIds( results, factorValues ); - // - // this.addEEByBiomaterialIds( results, biomaterials ); - // - // if ( t.getTime() > 500 ) { - // log.info( "Retrieving experiments associated with characteristics: " + t.getTime() + "ms" ); - // } - // - // return results; - // - // } - - /** - * Makes no attempt at resolving the search query as a URI. Will tokenize the search query if there are control - * characters in the String. URI's will get parsed into multiple query terms and lead to bad results. - *

- * Will try to resolve general terms like brain --> to appropriate OntologyTerms and search for objects tagged with - * those terms (if isUseCharacte = true) - */ - private SearchResultMapImpl generalSearch( SearchSettings settings ) throws SearchException { - // If nothing to search return nothing. - if ( StringUtils.isBlank( settings.getQuery() ) ) { - return new SearchResultMapImpl(); - } - - // attempt to infer a taxon from the query if missing - if ( settings.getTaxon() == null ) { - settings.setTaxon( inferTaxon( settings ) ); - } - - LinkedHashSet> rawResults = new LinkedHashSet<>(); - - // do gene first before we munge the query too much. - this.accreteResultsGenes( rawResults, settings ); - - this.accreteResultsOthers( - rawResults, - settings ); - - return groupAndSortResultsByType( rawResults, settings ); } /** * Combines compass style search, the db style search, and the compositeSequence search and returns 1 combined list * with no duplicates. */ - private Collection> geneSearch( final SearchSettings settings ) throws SearchException { + private SearchResultSet geneSearch( final SearchSettings settings ) throws SearchException { StopWatch watch = StopWatch.createStarted(); - Collection> geneDbList = this.searchSource.searchGene( settings ); + SearchResultSet combinedGeneList = new SearchResultSet<>( settings ); - if ( settings.getMode() == SearchSettings.SearchMode.FAST && geneDbList.size() > 0 ) { - return geneDbList; - } + combinedGeneList.addAll( this.searchSource.searchGene( settings ) ); - Set> combinedGeneList = new HashSet<>( geneDbList ); + // stop here in the fast search mode + if ( settings.getMode() == SearchSettings.SearchMode.FAST ) { + return combinedGeneList; + } - if ( combinedGeneList.isEmpty() ) { + // expand the search by including probes-associated genes + if ( combinedGeneList.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) ) { Collection> geneCsList = this.searchSource.searchCompositeSequenceAndGene( settings ); for ( SearchResult res : geneCsList ) { if ( Gene.class.equals( res.getResultType() ) ) @@ -1154,81 +586,12 @@ private Collection> geneSearch( final SearchSettings settings } } - if ( watch.getTime() > 1000 ) - SearchServiceImpl.log - .warn( "Gene search for " + settings + " took " + watch.getTime() + " ms; " + combinedGeneList - .size() + " results." ); - - return combinedGeneList; - } - - private Collection> geneSetSearch( SearchSettings settings ) throws SearchException { - return searchSource.searchGeneSet( settings ); - } - - // /** - // * Given classes to search and characteristics (experiment search) - // * - // * @param classes Which classes of entities to look for - // */ - // private Collection getAnnotatedEntities( Collection> classes, - // Collection cs ) { - // - // // time-critical - // Map characteristic2entity = characteristicService.getParents( classes, cs ); - // Collection matchedEntities = this - // .filterCharacteristicOwnersByClass( classes, characteristic2entity ); - // - // if ( SearchServiceImpl.log.isDebugEnabled() ) { - // this.debugParentFetch( characteristic2entity ); - // } - // return matchedEntities; - // } - - /** - * @return a collection of SearchResults holding all the genes resulting from the search with given SearchSettings. - */ - private Collection> getGenesFromSettings( SearchSettings settings ) throws SearchException { - Collection> genes = null; - if ( settings.hasResultType( Gene.class ) ) { - genes = this.geneSearch( settings ); - } - return genes; - } - - // /** - // * @return List of ids for the entities held by the search results. - // */ - // private List getIds( List searchResults ) { - // List list = new ArrayList<>(); - // for ( SearchResult r : searchResults ) { - // list.add( r.getId() ); - // } - // assert list.size() == searchResults.size(); - // return list; - // } - - /** - * Group and sort results by type. - * - * @return map of result entity class (e.g. BioSequence or ExpressionExperiment) to SearchResult - * @see SearchResult#getResultType() - */ - private static SearchResultMapImpl groupAndSortResultsByType( - LinkedHashSet> rawResults, - SearchSettings settings ) { - - SearchResultMapImpl results = new SearchResultMapImpl(); - List> sortedRawResults = rawResults.stream().sorted().collect( Collectors.toList() ); - - // Get the top N results for each class. - for ( SearchResult sr : sortedRawResults ) { - if ( settings.getMaxResults() < 1 || results.size() < settings.getMaxResults() ) { - results.add( sr ); - } + if ( watch.getTime() > 1000 ) { + SearchServiceImpl.log.warn( String.format( "Gene search for %s took %d ms; %d results.", + settings, watch.getTime(), combinedGeneList.size() ) ); } - return results; + return combinedGeneList; } private void initializeNameToTaxonMap() { @@ -1252,130 +615,29 @@ private void initializeNameToTaxonMap() { } - /** - * @return results, if the settings.termUri is populated. This includes gene uris. - */ - private SearchResultMapImpl ontologyUriSearch( SearchSettings settings ) throws SearchException { - SearchResultMapImpl results = new SearchResultMapImpl(); - - // 1st check to see if the query is a URI (from an ontology). - // Do this by seeing if we can find it in the loaded ontologies. - // Escape with general utilities because might not be doing a lucene backed search. (just a hibernate one). - String termUri = settings.getQuery(); - - if ( !settings.isTermQuery() ) { - return results; - } - - String uriString = StringEscapeUtils.escapeJava( StringUtils.strip( termUri ) ); - - /* - * Gene search. We want experiments that are annotated. But also genes. - */ - if ( StringUtils.containsIgnoreCase( uriString, SearchServiceImpl.NCBI_GENE ) ) { - // Perhaps is a valid gene URL. Want to search for the gene in gemma. - - // Get the gene - String ncbiAccessionFromUri = StringUtils.substringAfterLast( uriString, "/" ); - Gene g = null; - - try { - g = geneService.findByNCBIId( Integer.parseInt( ncbiAccessionFromUri ) ); - } catch ( NumberFormatException e ) { - // ok - } - if ( g != null ) { - - // 1st get objects tagged with the given gene identifier - if ( settings.hasResultType( ExpressionExperiment.class ) ) { // FIXME maybe we always want this? - Collection> eeHits = ontologySearchSource.searchExpressionExperiment( settings.withQuery( termUri ) ); - for ( SearchResult sr : eeHits ) { - Map highlights; - if ( sr.getHighlights() != null ) { - highlights = new HashMap<>( sr.getHighlights() ); - } else { - highlights = new HashMap<>(); - } - highlights.put( "term", g.getOfficialSymbol() ); - sr.setHighlights( highlights ); + private void addTerms( Taxon taxon, String taxonName ) { + String[] terms; + if ( StringUtils.isNotBlank( taxonName ) ) { + terms = taxonName.split( "\\s+" ); + // Only continue for multi-word + if ( terms.length > 1 ) { + for ( String s : terms ) { + if ( !nameToTaxonMap.containsKey( s.trim().toLowerCase() ) ) { + nameToTaxonMap.put( s.trim().toLowerCase(), taxon ); } - results.addAll( eeHits ); - } - - //// - if ( settings.hasResultType( Gene.class ) ) { - results.add( SearchResult.from( Gene.class, g, DatabaseSearchSource.MATCH_BY_ID_SCORE, "GeneService.findByNCBIId" ) ); - } } - return results; - } - - /* - * Not searching for a gene. Only other option is a direct URI search for experiments. - */ - if ( settings.hasResultType( ExpressionExperiment.class ) ) { - results.addAll( ontologySearchSource.searchExpressionExperiment( settings.withQuery( uriString ) ) ); } - - return results; } - // /** - // * Retrieve entities from the persistent store (if we don't have them already) - // */ - // private Collection retrieveResultEntities( Class entityClass, List results ) { - // List ids = this.getIds( results ); - // - // // FIXME: don't we want value objects? - // if ( ExpressionExperiment.class.isAssignableFrom( entityClass ) ) { - // return expressionExperimentService.load( ids ); - // } else if ( ArrayDesign.class.isAssignableFrom( entityClass ) ) { - // return arrayDesignService.load( ids ); - // } else if ( CompositeSequence.class.isAssignableFrom( entityClass ) ) { - // return compositeSequenceService.load( ids ); - // } else if ( BibliographicReference.class.isAssignableFrom( entityClass ) ) { - // return bibliographicReferenceService.load( ids ); - // } else if ( Gene.class.isAssignableFrom( entityClass ) ) { - // return geneService.load( ids ); - // } else if ( BioSequence.class.isAssignableFrom( entityClass ) ) { - // return bioSequenceService.load( ids ); - // } else if ( GeneSet.class.isAssignableFrom( entityClass ) ) { - // return geneSetService.load( ids ); - // } else if ( ExpressionExperimentSet.class.isAssignableFrom( entityClass ) ) { - // return experimentSetService.load( ids ); - // } else if ( Characteristic.class.isAssignableFrom( entityClass ) ) { - // Collection chars = new ArrayList<>(); - // for ( Long id : ids ) { - // chars.add( characteristicService.load( id ) ); - // } - // return chars; - // } else if ( CharacteristicValueObject.class.isAssignableFrom( entityClass ) ) { - // // TEMP HACK this whole method should not be needed in many cases - // Collection chars = new ArrayList<>(); - // for ( SearchResult result : results ) { - // if ( result.getResultClass().isAssignableFrom( CharacteristicValueObject.class ) ) { - // chars.add( ( CharacteristicValueObject ) result.getResultObject() ); - // } - // } - // return chars; - // } else if ( ExpressionExperimentSet.class.isAssignableFrom( entityClass ) ) { - // return experimentSetService.load( ids ); - // } else if ( BlacklistedEntity.class.isAssignableFrom( entityClass ) ) { - // return blackListDao.load( ids ); - // } else { - // throw new UnsupportedOperationException( "Don't know how to retrieve objects for class=" + entityClass ); - // } - // } - - /** * Infer a {@link Taxon} from the search settings. */ - private Taxon inferTaxon( SearchSettings settings ) { + @Nullable + private Taxon inferTaxon( SearchSettings settings ) throws SearchException { // split the query around whitespace characters, limit the splitting to 4 terms (may be excessive) // remove quotes and other characters tha can interfere with the exact match - String[] searchTerms = prepareDatabaseQuery( settings ).split( "\\s+", 4 ); + Set searchTerms = extractTerms( settings ); for ( String term : searchTerms ) { if ( nameToTaxonMap.containsKey( term ) ) { @@ -1386,13 +648,4 @@ private Taxon inferTaxon( SearchSettings settings ) { // no match found, on taxon is inferred return null; } - - /** - * Check if a collection of search results is already filled. - * - * @return true if the search results are filled and cannot accept more results, false otherwise - */ - private static boolean isFilled( Collection> results, SearchSettings settings ) { - return settings.getMaxResults() > 0 && results.size() >= settings.getMaxResults(); - } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/SearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/SearchSource.java index ae61a1a0b5..82762b1633 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/SearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/SearchSource.java @@ -3,6 +3,7 @@ import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.search.SearchSettings; +import ubic.gemma.model.expression.BlacklistedEntity; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; @@ -21,6 +22,11 @@ */ public interface SearchSource { + /** + * Indicate if this source accepts the given search settings. + */ + boolean accepts( SearchSettings settings ); + default Collection> searchArrayDesign( SearchSettings settings ) throws SearchException { return Collections.emptyList(); } @@ -48,7 +54,10 @@ default Collection> searchBioSequence( SearchSettings @Deprecated default Collection> searchBioSequenceAndGene( SearchSettings settings, @Nullable Collection> previousGeneSearchResults ) throws SearchException { - return Collections.emptyList(); + Collection> results = new HashSet<>(); + results.addAll( this.searchBioSequence( settings ) ); + results.addAll( this.searchGene( settings ) ); + return results; } default Collection> searchCompositeSequence( SearchSettings settings ) throws SearchException { @@ -66,7 +75,7 @@ default Collection> searchCompositeSequence( Sea @Deprecated default Collection> searchCompositeSequenceAndGene( SearchSettings settings ) throws SearchException { Collection> results = new HashSet<>(); - results.addAll( this.searchBioSequence( settings ) ); + results.addAll( this.searchCompositeSequence( settings ) ); results.addAll( this.searchGene( settings ) ); return results; } @@ -82,4 +91,8 @@ default Collection> searchGene( SearchSettings settings ) thr default Collection> searchGeneSet( SearchSettings settings ) throws SearchException { return Collections.emptyList(); } + + default Collection> searchBlacklistedEntities( SearchSettings settings ) throws SearchException { + return Collections.emptyList(); + } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneHighlighter.java b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneHighlighter.java new file mode 100644 index 0000000000..36641a53f4 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneHighlighter.java @@ -0,0 +1,24 @@ +package ubic.gemma.core.search.lucene; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.highlight.Formatter; +import ubic.gemma.core.search.Highlighter; + +import java.util.Map; + +/** + * Highlighter with additional capabilities for Lucene. + */ +public interface LuceneHighlighter extends Highlighter { + + /** + * Obtain a formatter for highlights. + */ + Formatter getFormatter(); + + /** + * Highlight a given Lucene document. + */ + Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneParseSearchException.java b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneParseSearchException.java new file mode 100644 index 0000000000..ff6c39a5b8 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneParseSearchException.java @@ -0,0 +1,18 @@ +package ubic.gemma.core.search.lucene; + +import org.apache.lucene.queryParser.ParseException; +import ubic.gemma.core.search.ParseSearchException; + +/** + * @author poirigui + */ +public class LuceneParseSearchException extends ParseSearchException { + + public LuceneParseSearchException( String query, String message, ParseException cause ) { + super( message, cause ); + } + + public LuceneParseSearchException( String query, String message, ParseException cause, LuceneParseSearchException originalParseException ) { + super( message, cause, originalParseException ); + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java new file mode 100644 index 0000000000..f7c9e3112f --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java @@ -0,0 +1,283 @@ +package ubic.gemma.core.search.lucene; + +import lombok.extern.apachecommons.CommonsLog; +import org.apache.commons.lang3.exception.ExceptionUtils; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.*; +import org.apache.lucene.util.Version; +import org.hibernate.search.util.impl.PassThroughAnalyzer; +import ubic.gemma.core.search.SearchException; +import ubic.gemma.model.common.search.SearchSettings; + +import javax.annotation.Nullable; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Pattern; + +/** + * Utilities for parsing search queries using Lucene. + * @author poirigui + */ +@CommonsLog +public class LuceneQueryUtils { + + private static final Pattern LUCENE_RESERVED_CHARS = Pattern.compile( "[+\\-&|!(){}\\[\\]^\"~*?:\\\\]" ); + + private static QueryParser createQueryParser() { + return new QueryParser( Version.LUCENE_36, "", new PassThroughAnalyzer( Version.LUCENE_36 ) ); + } + + /** + * Safely parse the given search settings into a Lucene query, falling back on a query with special characters + * escaped if necessary. + */ + public static Query parseSafely( SearchSettings settings, QueryParser queryParser ) throws SearchException { + String query = settings.getQuery(); + try { + return queryParser.parse( query ); + } catch ( ParseException e ) { + String strippedQuery = LUCENE_RESERVED_CHARS.matcher( settings.getQuery() ).replaceAll( "\\\\$0" ); + log.debug( String.format( "Failed to parse '%s': %s.", query, ExceptionUtils.getRootCauseMessage( e ) ), e ); + try { + return queryParser.parse( strippedQuery ); + } catch ( ParseException e2 ) { + throw new LuceneParseSearchException( + strippedQuery, + ExceptionUtils.getRootCauseMessage( e2 ), + e2, + new LuceneParseSearchException( query, ExceptionUtils.getRootCauseMessage( e ), e ) ); + } + } + } + + /** + * Extract terms, regardless of their logical organization. + *

+ * Prohibited terms are excluded. + */ + public static Set extractTerms( SearchSettings settings ) throws SearchException { + Set terms = new HashSet<>(); + extractTerms( parseSafely( settings, createQueryParser() ), terms ); + return terms; + } + + private static void extractTerms( Query query, Set terms ) { + if ( query instanceof BooleanQuery ) { + for ( BooleanClause clause : ( ( BooleanQuery ) query ) ) { + if ( !clause.isProhibited() ) { + extractTerms( clause.getQuery(), terms ); + } + } + } else if ( query instanceof TermQuery && isTermGlobal( ( ( TermQuery ) query ).getTerm() ) ) { + terms.add( termToString( ( ( TermQuery ) query ).getTerm() ) ); + } + } + + /** + * Extract a DNF (Disjunctive Normal Form) from the terms of a query. + *

+ * Clauses can be nested (i.e. {@code a OR (d OR (c AND (d AND e))}) as long as {@code OR} and {@code AND} are not + * interleaved. + *

+ * Prohibited clauses are ignored unless they break the DNF structure, in which case this will return an empty set. + */ + public static Set> extractTermsDnf( SearchSettings settings ) throws SearchException { + Query q = parseSafely( settings, createQueryParser() ); + Set> result; + if ( q instanceof BooleanQuery ) { + Set> ds = new HashSet<>(); + if ( extractNestedDisjunctions( ( BooleanQuery ) q, ds ) ) { + result = ds; + } else { + result = Collections.emptySet(); + } + } else if ( q instanceof TermQuery && isTermGlobal( ( ( TermQuery ) q ).getTerm() ) ) { + result = Collections.singleton( Collections.singleton( termToString( ( ( TermQuery ) q ).getTerm() ) ) ); + } else { + result = Collections.emptySet(); + } + return result; + } + + private static boolean extractNestedDisjunctions( BooleanQuery query, Set> terms ) { + if ( query.clauses().stream().anyMatch( BooleanClause::isRequired ) ) { + Set subClause = new HashSet<>(); + terms.add( subClause ); + return extractNestedConjunctions( query, subClause ); + } + // at this point, all clauses are optional + for ( BooleanClause clause : query.clauses() ) { + if ( clause.isProhibited() ) { + continue; + } + assert !clause.isRequired(); + if ( clause.getQuery() instanceof BooleanQuery ) { + if ( !extractNestedDisjunctions( ( BooleanQuery ) clause.getQuery(), terms ) ) { + return false; + } + } else if ( clause.getQuery() instanceof TermQuery && isTermGlobal( ( ( TermQuery ) clause.getQuery() ).getTerm() ) ) { + terms.add( Collections.singleton( termToString( ( ( TermQuery ) clause.getQuery() ).getTerm() ) ) ); + } + } + return true; + } + + /** + * Extract nested conjunctions from a query and populate their terms in the given set. + * + * @return true if all the clauses in the query are conjunctions + */ + private static boolean extractNestedConjunctions( BooleanQuery query, Set terms ) { + if ( !query.clauses().stream().allMatch( c -> c.isRequired() || c.isProhibited() ) ) { + // found a disjunction, this is not a valid nested conjunction + return false; + } + // at this point, all the clauses are required + for ( BooleanClause clause : query.clauses() ) { + if ( clause.isProhibited() ) { + continue; + } + if ( clause.getQuery() instanceof BooleanQuery ) { + if ( !extractNestedConjunctions( ( BooleanQuery ) clause.getQuery(), terms ) ) { + return false; + } + } else if ( clause.getQuery() instanceof TermQuery && isTermGlobal( ( ( TermQuery ) clause.getQuery() ).getTerm() ) ) { + terms.add( termToString( ( ( TermQuery ) clause.getQuery() ).getTerm() ) ); + } + } + return true; + } + + /** + * Escape the query for a database match. + * @see #prepareDatabaseQuery(SearchSettings, boolean) + */ + @Nullable + public static String prepareDatabaseQuery( SearchSettings settings ) throws SearchException { + return prepareDatabaseQuery( settings, false ); + } + + /** + * Obtain a query suitable for a database match. + *

+ * This method will return the first global term in the query that is not prohibited. If {@code allowWildcards} is + * set to true, prefix and wildcard terms will be considered as well. + *

+ * The resulting string is free from character that would usually be used for a free-text match unless + * {@code allowWildcards} is set to true. + *

+ * @param allowWildcards if true, wildcards are supported (i.e. '*' and '?') and translated to their corresponding + * LIKE SQL syntax (i.e. '%' and '_'), all other special characters are escaped. + * @return the first suitable term in the query, or null if none of them are applicable for a database query + */ + @Nullable + public static String prepareDatabaseQuery( SearchSettings settings, boolean allowWildcards ) throws SearchException { + return prepareDatabaseQueryInternal( parseSafely( settings, createQueryParser() ), allowWildcards ); + } + + @Nullable + private static String prepareDatabaseQueryInternal( Query query, boolean allowWildcards ) { + if ( query instanceof BooleanQuery ) { + // pick the first, non-prohibited term + for ( BooleanClause c : ( BooleanQuery ) query ) { + if ( !c.isProhibited() ) { + return prepareDatabaseQueryInternal( c.getQuery(), allowWildcards ); + } + } + } else if ( allowWildcards && query instanceof WildcardQuery && isTermGlobal( ( ( WildcardQuery ) query ).getTerm() ) ) { + return escapeLike( termToString( ( ( WildcardQuery ) query ).getTerm() ) ) + .replace( '?', '_' ) + .replace( '*', '%' ); + } else if ( allowWildcards && query instanceof PrefixQuery && isTermGlobal( ( ( PrefixQuery ) query ).getPrefix() ) ) { + return escapeLike( termToString( ( ( PrefixQuery ) query ).getPrefix() ) ) + "%"; + } else if ( query instanceof TermQuery && isTermGlobal( ( ( TermQuery ) query ).getTerm() ) ) { + if ( allowWildcards ) { + return escapeLike( termToString( ( ( TermQuery ) query ).getTerm() ) ); + } else { + return termToString( ( ( TermQuery ) query ).getTerm() ); + } + } + return null; + } + + @Nullable + public static URI prepareTermUriQuery( SearchSettings settings ) throws SearchException { + Query query = parseSafely( settings, createQueryParser() ); + if ( query instanceof TermQuery ) { + Term term = ( ( TermQuery ) query ).getTerm(); + return tryParseUri( term ); + } + return null; + } + + /** + * Check if a given term is global (i.e. not fielded). + *

+ * This includes the corner case when a term is a URI and would be parsed as a fielded term. + */ + private static boolean isTermGlobal( Term term ) { + return term.field().isEmpty() || tryParseUri( term ) != null; + } + + /** + * Extract a suitable string from a term, detecting URIs that would be parsed as a fielded term. + */ + private static String termToString( Term term ) { + URI uri; + if ( ( uri = tryParseUri( term ) ) != null ) { + return uri.toString(); + } else { + return term.text(); + } + } + + @Nullable + private static URI tryParseUri( Term term ) { + if ( term.text().startsWith( "http://" ) || term.text().startsWith( "https://" ) ) { + try { + return new URI( term.text() ); + } catch ( URISyntaxException e ) { + // ignore, it will be treated as a term term + } + } else if ( ( term.field().equals( "http" ) || term.field().equals( "https" ) ) && term.text().startsWith( "//" ) ) { + try { + return new URI( term.field() + ":" + term.text() ); + } catch ( URISyntaxException e ) { + // ignore, it will be treated as a fielded term + } + } + return null; + } + + private static String escapeLike( String s ) { + return s.replaceAll( "[%_\\\\]", "\\\\$0" ); + } + + /** + * Check if the query is a wildcard query. + */ + public static boolean isWildcard( SearchSettings settings ) { + try { + return isWildcard( createQueryParser().parse( settings.getQuery() ) ); + } catch ( ParseException e ) { + return false; + } + } + + private static boolean isWildcard( Query query ) { + if ( query instanceof BooleanQuery ) { + for ( BooleanClause clause : ( ( BooleanQuery ) query ) ) { + // prohibited clauses are not used for database search + if ( !clause.isProhibited() ) { + return isWildcard( clause.getQuery() ); + } + } + } + return query instanceof WildcardQuery || query instanceof PrefixQuery; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/package-info.java b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/package-info.java new file mode 100644 index 0000000000..866cb4520a --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/package-info.java @@ -0,0 +1,7 @@ +/** + * + */ +@ParametersAreNonnullByDefault +package ubic.gemma.core.search.lucene; + +import javax.annotation.ParametersAreNonnullByDefault; \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/CompositeSearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/CompositeSearchSource.java index b3432b676c..524eb84413 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/CompositeSearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/CompositeSearchSource.java @@ -2,6 +2,7 @@ import lombok.extern.apachecommons.CommonsLog; import org.apache.commons.lang3.time.StopWatch; +import org.springframework.util.Assert; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchResultSet; @@ -10,6 +11,7 @@ import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.search.SearchSettings; +import ubic.gemma.model.expression.BlacklistedEntity; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; @@ -28,7 +30,15 @@ /** * A search source constituted of multiple other sources. - * + *

+ * Sources are used in the order they are passed to the {@link #CompositeSearchSource(List)} constructor. + *

+ * This source checks if the {@link SearchSource} are accepted by each individual source with + * {@link SearchSource#accepts(SearchSettings)} and subsequently delegate the operation. + *

+ * It also supports logging of the time spent by each source and the number of results found. This is done at the DEBUG + * level unless the value set by {@link #setWarningThresholdMills(int)} or {@link #setFastWarningThresholdMillis(int)} + * is exceeded in which case WARNING is used. * @author poirigui */ @CommonsLog @@ -36,28 +46,56 @@ public class CompositeSearchSource implements SearchSource { private final List sources; + private int fastWarningThresholdMillis = 100; + private int warningThresholdMills = 1000; + public CompositeSearchSource( List sources ) { this.sources = sources; } + /** + * Threshold in milliseconds for a warning to be logged when searching with {@link ubic.gemma.model.common.search.SearchSettings.SearchMode#FAST}. + *

+ * The default is 100 ms. + */ + public void setFastWarningThresholdMillis( int fastWarningThresholdMillis ) { + Assert.isTrue( fastWarningThresholdMillis >= 0 ); + this.fastWarningThresholdMillis = fastWarningThresholdMillis; + } + + /** + * Threshold in milliseconds for a warning to be logged. + *

+ * The default is 1000 ms. + */ + public void setWarningThresholdMills( int warningThresholdMills ) { + Assert.isTrue( warningThresholdMills >= 0 ); + this.warningThresholdMills = warningThresholdMills; + } + + @Override + public boolean accepts( SearchSettings settings ) { + return sources.stream().anyMatch( s -> s.accepts( settings ) ); + } + @Override public Collection> searchArrayDesign( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchArrayDesign( settings ), ArrayDesign.class ); + return searchWith( settings, SearchSource::searchArrayDesign, ArrayDesign.class ); } @Override public Collection> searchBibliographicReference( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchBibliographicReference( settings ), BibliographicReference.class ); + return searchWith( settings, SearchSource::searchBibliographicReference, BibliographicReference.class ); } @Override public Collection> searchExperimentSet( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchExperimentSet( settings ), ExpressionExperimentSet.class ); + return searchWith( settings, SearchSource::searchExperimentSet, ExpressionExperimentSet.class ); } @Override public Collection> searchBioSequence( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchBioSequence( settings ), BioSequence.class ); + return searchWith( settings, SearchSource::searchBioSequence, BioSequence.class ); } @Override @@ -73,7 +111,7 @@ public Collection> searchBioSequenceAndGene( SearchSettings sett @Override public Collection> searchCompositeSequence( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchCompositeSequence( settings ), CompositeSequence.class ); + return searchWith( settings, SearchSource::searchCompositeSequence, CompositeSequence.class ); } @Override @@ -89,45 +127,69 @@ public Collection> searchCompositeSequenceAndGene( SearchSetting @Override public Collection> searchExpressionExperiment( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchExpressionExperiment( settings ), ExpressionExperiment.class ); + return searchWith( settings, SearchSource::searchExpressionExperiment, ExpressionExperiment.class ); } @Override public Collection> searchGene( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchGene( settings ), Gene.class ); + return searchWith( settings, SearchSource::searchGene, Gene.class ); } @Override public Collection> searchGeneSet( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchGeneSet( settings ), GeneSet.class ); + return searchWith( settings, SearchSource::searchGeneSet, GeneSet.class ); + } + + @Override + public Collection> searchBlacklistedEntities( SearchSettings settings ) throws SearchException { + return searchWith( settings, SearchSource::searchBlacklistedEntities, BlacklistedEntity.class ); } - @FunctionalInterface - public interface SearchFunction { - Collection> apply( SearchSource searchSource ) throws SearchException; + private interface SearchFunction { + Collection> apply( SearchSource searchSource, SearchSettings settings ) throws SearchException; } - private Collection> searchWith( SearchFunction func, Class clazz ) throws SearchException { + private Collection> searchWith( SearchSettings settings, SearchFunction func, Class clazz ) throws SearchException { StopWatch timer = StopWatch.createStarted(); - Set> results = new SearchResultSet<>(); + Set> results = new SearchResultSet<>( settings ); long[] timeSpentBySource = new long[sources.size()]; int[] foundItemsBySource = new int[sources.size()]; int[] newItemsBySource = new int[sources.size()]; for ( int i = 0; i < sources.size(); i++ ) { long timeBefore = timer.getTime( TimeUnit.MILLISECONDS ); - int sizeBefore = results.size(); SearchSource source = sources.get( i ); - Collection> r = func.apply( source ); - results.addAll( r ); - foundItemsBySource[i] = r.size(); - newItemsBySource[i] = results.size() - sizeBefore; + if ( source.accepts( settings ) ) { + int sizeBefore = results.size(); + Collection> r = func.apply( source, settings ); + results.addAll( r ); + foundItemsBySource[i] = r.size(); + newItemsBySource[i] = results.size() - sizeBefore; + } else { + foundItemsBySource[i] = 0; + newItemsBySource[i] = 0; + } timeSpentBySource[i] = timer.getTime( TimeUnit.MILLISECONDS ) - timeBefore; } timer.stop(); - boolean shouldWarn = timer.getTime( TimeUnit.MILLISECONDS ) > 200; + boolean shouldWarn; + switch ( settings.getMode() ) { + case FAST: + shouldWarn = timer.getTime() > Math.min( fastWarningThresholdMillis, warningThresholdMills ); + break; + case BALANCED: + shouldWarn = timer.getTime() > warningThresholdMills; + break; + case ACCURATE: + default: + shouldWarn = false; + } if ( shouldWarn || log.isDebugEnabled() ) { - String breakdownBySource = IntStream.range( 0, sources.size() ).mapToObj( i -> String.format( "source: %s, found items: %d, found items (novel): %d, time spent: %d ms", sources.get( i ).getClass().getSimpleName(), foundItemsBySource[i], newItemsBySource[i], timeSpentBySource[i] ) ).collect( Collectors.joining( "; " ) ); - String message = String.format( "Found %d %s results in %d ms (%s)", results.size(), clazz.getSimpleName(), timer.getTime( TimeUnit.MILLISECONDS ), breakdownBySource ); + String breakdownBySource = IntStream.range( 0, sources.size() ) + .mapToObj( i -> String.format( "source: %s, found items: %d, found items (novel): %d, time spent: %d ms", + sources.get( i ).getClass().getSimpleName(), foundItemsBySource[i], newItemsBySource[i], timeSpentBySource[i] ) ) + .collect( Collectors.joining( "; " ) ); + String message = String.format( "Found %d %s results in %d ms (%s)", results.size(), clazz.getSimpleName(), + timer.getTime( TimeUnit.MILLISECONDS ), breakdownBySource ); if ( shouldWarn ) { log.warn( message ); } else { diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/DatabaseSearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/DatabaseSearchSource.java index d3076e00fe..df6a28b0dd 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/DatabaseSearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/DatabaseSearchSource.java @@ -2,25 +2,29 @@ import gemma.gsec.util.SecurityUtil; import lombok.extern.apachecommons.CommonsLog; -import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.Ordered; import org.springframework.stereotype.Component; import ubic.gemma.core.genome.gene.service.GeneService; import ubic.gemma.core.genome.gene.service.GeneSetService; +import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchResultSet; import ubic.gemma.core.search.SearchSource; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.search.SearchSettings; +import ubic.gemma.model.expression.BlacklistedEntity; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.biosequence.BioSequence; import ubic.gemma.model.genome.gene.GeneSet; +import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService; +import ubic.gemma.persistence.service.expression.experiment.BlacklistedEntityService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetService; import ubic.gemma.persistence.service.genome.biosequence.BioSequenceService; @@ -30,8 +34,8 @@ import java.util.*; import java.util.stream.Collectors; -import static ubic.gemma.core.search.source.DatabaseSearchSourceUtils.prepareDatabaseQuery; -import static ubic.gemma.core.search.source.DatabaseSearchSourceUtils.prepareDatabaseQueryForInexactMatch; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.isWildcard; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.prepareDatabaseQuery; /** * Search source for direct database results. @@ -43,7 +47,9 @@ */ @Component @CommonsLog -public class DatabaseSearchSource implements SearchSource { +public class DatabaseSearchSource implements SearchSource, Ordered { + + public static final String NCBI_GENE_ID_URI_PREFIX = "http://purl.org/commons/record/ncbi_gene/"; /** * Score when a result is matched exactly by numerical ID. @@ -69,6 +75,8 @@ public class DatabaseSearchSource implements SearchSource { */ private final double INDIRECT_HIT_PENALTY = 0.8; + @Autowired + private ArrayDesignService arrayDesignService; @Autowired private BioSequenceService bioSequenceService; @Autowired @@ -83,74 +91,121 @@ public class DatabaseSearchSource implements SearchSource { private GeneSetService geneSetService; @Autowired private ExpressionExperimentSetService experimentSetService; + @Autowired + private BlacklistedEntityService blacklistedEntityService; + + @Override + public int getOrder() { + return Ordered.HIGHEST_PRECEDENCE; + } + + @Override + public boolean accepts( SearchSettings settings ) { + return settings.isUseDatabase(); + } /** + * A general search for array designs. + *

+ * This search does both an database search and a compass search. This is also contains an underlying + * {@link CompositeSequence} search, returning the {@link ArrayDesign} collection for the given composite sequence + * search string (the returned collection of array designs does not contain duplicates). + *

* Searches the DB for array designs which have composite sequences whose names match the given search string. * Because of the underlying database search, this is acl aware. That is, returned array designs are filtered based * on access control list (ACL) permissions. */ @Override - public Collection> searchArrayDesign( SearchSettings settings ) { - if ( !settings.isUseDatabase() ) + public Collection> searchArrayDesign( SearchSettings settings ) throws SearchException { + StopWatch watch = StopWatch.createStarted(); + String query = prepareDatabaseQuery( settings ); + if ( query == null ) { return Collections.emptySet(); + } - StopWatch watch = StopWatch.createStarted(); + SearchResultSet results = new SearchResultSet<>( settings ); - Collection adSet = new HashSet<>(); + ArrayDesign shortNameResult = arrayDesignService.findByShortName( query ); + if ( shortNameResult != null ) { + results.add( SearchResult.from( ArrayDesign.class, shortNameResult, DatabaseSearchSource.MATCH_BY_SHORT_NAME_SCORE, null, "ArrayDesignService.findByShortName" ) ); + return results; + } + + Collection nameResult = arrayDesignService.findByName( query ); + if ( nameResult != null && !nameResult.isEmpty() ) { + for ( ArrayDesign ad : nameResult ) { + results.add( SearchResult.from( ArrayDesign.class, ad, DatabaseSearchSource.MATCH_BY_NAME_SCORE, null, "ArrayDesignService.findByShortName" ) ); + } + return results; + } + + Collection altNameResults = arrayDesignService.findByAlternateName( query ); + for ( ArrayDesign arrayDesign : altNameResults ) { + results.add( SearchResult.from( ArrayDesign.class, arrayDesign, 0.9, null, "ArrayDesignService.findByAlternateName" ) ); + } + + Collection manufacturerResults = arrayDesignService.findByManufacturer( query ); + for ( ArrayDesign arrayDesign : manufacturerResults ) { + results.add( SearchResult.from( ArrayDesign.class, arrayDesign, 0.9, null, "ArrayDesignService.findByManufacturer" ) ); + } // search by exact composite sequence name - Collection matchedCs = compositeSequenceService.findByName( prepareDatabaseQuery( settings ) ); + Collection matchedCs = compositeSequenceService.findByName( query ); for ( CompositeSequence sequence : matchedCs ) { - adSet.add( sequence.getArrayDesign() ); + ArrayDesign entity = sequence.getArrayDesign(); + results.add( SearchResult.from( ArrayDesign.class, entity, INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, null, "CompositeSequenceService.findByName" ) ); } watch.stop(); - if ( watch.getTime() > 1000 ) - DatabaseSearchSource.log - .info( "Array Design Composite Sequence DB search for " + settings + " took " + watch.getTime() - + " ms" + " found " + adSet.size() + " Ads" ); + if ( watch.getTime() > 1000 ) { + DatabaseSearchSource.log.warn( String.format( "Array Design DB search for %s with '%s' took %d ms found %d Ads", + settings, query, watch.getTime(), results.size() ) ); + } - return toSearchResults( ArrayDesign.class, adSet, MATCH_BY_NAME_SCORE, "CompositeSequenceService.findByName" ); + return results; } @Override - - public Collection> searchExperimentSet( SearchSettings settings ) { - return toSearchResults( ExpressionExperimentSet.class, this.experimentSetService.findByName( settings.getQuery() ), MATCH_BY_NAME_SCORE, "ExperimentSetService.findByName" ); + public Collection> searchExperimentSet( SearchSettings settings ) throws SearchException { + String query = prepareDatabaseQuery( settings ); + if ( query == null ) { + return Collections.emptySet(); + } + return toSearchResults( settings, ExpressionExperimentSet.class, this.experimentSetService.findByName( query ), MATCH_BY_NAME_SCORE, "ExperimentSetService.findByName" ); } /** * A database search for biosequences. Biosequence names are already indexed by compass... */ @Override - public Collection> searchBioSequence( SearchSettings settings ) { - if ( !settings.isUseDatabase() ) - return Collections.emptySet(); - + public Collection> searchBioSequence( SearchSettings settings ) throws SearchException { StopWatch watch = StopWatch.createStarted(); String searchString = prepareDatabaseQuery( settings ); + if ( searchString == null ) { + return Collections.emptySet(); + } Collection bs = bioSequenceService.findByName( searchString ); // bioSequenceService.thawRawAndProcessed( bs ); - Collection> bioSequenceList = toSearchResults( BioSequence.class, bs, MATCH_BY_NAME_SCORE, "BioSequenceService.findByName" ); + Collection> bioSequenceList = toSearchResults( settings, BioSequence.class, bs, MATCH_BY_NAME_SCORE, "BioSequenceService.findByName" ); watch.stop(); - if ( watch.getTime() > 1000 ) - DatabaseSearchSource.log - .info( "BioSequence DB search for " + searchString + " took " + watch.getTime() + " ms and found" - + bioSequenceList.size() + " BioSequences" ); + if ( watch.getTime() > 1000 ) { + DatabaseSearchSource.log.warn( String.format( "BioSequence DB search for %s with '%s' took %d ms and found %d BioSequences", + settings, searchString, watch.getTime(), bioSequenceList.size() ) ); + } return bioSequenceList; } @Override - public Collection> searchBioSequenceAndGene( SearchSettings settings, @Nullable Collection> previousGeneSearchResults ) { + public Collection> searchBioSequenceAndGene( SearchSettings settings, @Nullable Collection> previousGeneSearchResults ) throws SearchException { return new HashSet<>( this.searchBioSequence( settings ) ); } @Override - public Collection> searchCompositeSequence( SearchSettings settings ) { + public Collection> searchCompositeSequence( SearchSettings settings ) throws SearchException { return this.searchCompositeSequenceAndPopulateGenes( settings, Collections.emptySet() ); } @@ -158,8 +213,8 @@ public Collection> searchCompositeSequence( Sear * Search the DB for composite sequences and the genes that are matched to them. */ @Override - public Collection> searchCompositeSequenceAndGene( SearchSettings settings ) { - Set> geneSet = new SearchResultSet<>(); + public Collection> searchCompositeSequenceAndGene( SearchSettings settings ) throws SearchException { + Set> geneSet = new SearchResultSet<>( settings ); Collection> matchedCs = this.searchCompositeSequenceAndPopulateGenes( settings, geneSet ); Collection> combinedResults = new HashSet<>(); combinedResults.addAll( geneSet ); @@ -167,34 +222,34 @@ public Collection> searchCompositeSequenceAndGene( SearchSetting return combinedResults; } - private Collection> searchCompositeSequenceAndPopulateGenes( SearchSettings settings, Set> geneSet ) { - if ( !settings.isUseDatabase() ) - return Collections.emptySet(); - + private Collection> searchCompositeSequenceAndPopulateGenes( SearchSettings settings, Set> geneSet ) throws SearchException { StopWatch watch = StopWatch.createStarted(); String searchString = prepareDatabaseQuery( settings ); + if ( searchString == null ) { + return Collections.emptySet(); + } ArrayDesign ad = settings.getPlatformConstraint(); // search by exact composite sequence name - Collection> matchedCs = new SearchResultSet<>(); + Collection> matchedCs = new SearchResultSet<>( settings ); if ( ad != null ) { CompositeSequence cs = compositeSequenceService.findByName( ad, searchString ); if ( cs != null ) - matchedCs.add( SearchResult.from( CompositeSequence.class, cs, MATCH_BY_NAME_SCORE, "CompositeSequenceService.findByName" ) ); + matchedCs.add( SearchResult.from( CompositeSequence.class, cs, MATCH_BY_NAME_SCORE, null, "CompositeSequenceService.findByName" ) ); } else { - matchedCs = toSearchResults( CompositeSequence.class, compositeSequenceService.findByName( searchString ), MATCH_BY_NAME_SCORE, "CompositeSequenceService.findByName" ); + matchedCs = toSearchResults( settings, CompositeSequence.class, compositeSequenceService.findByName( searchString ), MATCH_BY_NAME_SCORE, "CompositeSequenceService.findByName" ); } /* * Search by biosequence */ - if ( matchedCs.isEmpty() ) { + if ( matchedCs.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) ) { Collection csViaBioSeq = compositeSequenceService.findByBioSequenceName( searchString ); if ( ad != null ) { csViaBioSeq.removeIf( c -> !c.getArrayDesign().equals( ad ) ); } - matchedCs.addAll( toSearchResults( CompositeSequence.class, csViaBioSeq, INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, "CompositeSequenceService.findByBioSequenceName" ) ); + matchedCs.addAll( toSearchResults( settings, CompositeSequence.class, csViaBioSeq, INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, "CompositeSequenceService.findByBioSequenceName" ) ); } /* @@ -211,9 +266,9 @@ private Collection> searchCompositeSequenceAndPo // results from the database are always pre-filled assert g.getResultObject() != null; if ( settings.getPlatformConstraint() != null ) { - matchedCs.addAll( toSearchResults( CompositeSequence.class, compositeSequenceService.findByGene( g.getResultObject(), settings.getPlatformConstraint() ), INDIRECT_HIT_PENALTY * g.getScore(), "CompositeSequenceService.findByGene with platform constraint" ) ); + matchedCs.addAll( toSearchResults( settings, CompositeSequence.class, compositeSequenceService.findByGene( g.getResultObject(), settings.getPlatformConstraint() ), INDIRECT_HIT_PENALTY * g.getScore(), "CompositeSequenceService.findByGene with platform constraint" ) ); } else { - matchedCs.addAll( toSearchResults( CompositeSequence.class, compositeSequenceService.findByGene( g.getResultObject() ), INDIRECT_HIT_PENALTY * g.getScore(), "CompositeSequenceService.findByGene" ) ); + matchedCs.addAll( toSearchResults( settings, CompositeSequence.class, compositeSequenceService.findByGene( g.getResultObject() ), INDIRECT_HIT_PENALTY * g.getScore(), "CompositeSequenceService.findByGene" ) ); } } @@ -225,14 +280,13 @@ private Collection> searchCompositeSequenceAndPo for ( Collection genes : compositeSequenceService.getGenes( compositeSequences ).values() ) { // TODO: each individual CS have a potentially different score that should be reflected in the gene score, // but that would require knowing which CS matched which gene - geneSet.addAll( toSearchResults( Gene.class, genes, INDIRECT_HIT_PENALTY, "CompositeSequenceService.getGenes" ) ); + geneSet.addAll( toSearchResults( settings, Gene.class, genes, INDIRECT_HIT_PENALTY, "CompositeSequenceService.getGenes" ) ); } watch.stop(); if ( watch.getTime() > 1000 ) - DatabaseSearchSource.log - .info( "Gene composite sequence DB search " + searchString + " took " + watch.getTime() + " ms, " - + geneSet.size() + " items." ); + DatabaseSearchSource.log.warn( String.format( "Gene composite sequence DB search for %s with '%s' took %d ms, %d items.", + settings, searchString, watch.getTime(), geneSet.size() ) ); return matchedCs; } @@ -245,15 +299,15 @@ private Collection> searchCompositeSequenceAndPo * @return {@link Collection} */ @Override - public Collection> searchExpressionExperiment( SearchSettings settings ) { - if ( !settings.isUseDatabase() ) - return Collections.emptySet(); - + public Collection> searchExpressionExperiment( SearchSettings settings ) throws SearchException { StopWatch watch = StopWatch.createStarted(); String query = prepareDatabaseQuery( settings ); + if ( query == null ) { + return Collections.emptySet(); + } - Collection> results = new SearchResultSet<>(); + Collection> results = new SearchResultSet<>( settings ); Collection ees = expressionExperimentService.findByName( query ); for ( ExpressionExperiment ee : ees ) { @@ -261,21 +315,21 @@ public Collection> searchExpressionExperiment } // in response to https://github.com/PavlidisLab/Gemma/issues/140, always keep going if admin. - if ( results.isEmpty() || SecurityUtil.isUserAdmin() ) { + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) || SecurityUtil.isUserAdmin() ) { ExpressionExperiment ee = expressionExperimentService.findByShortName( query ); if ( ee != null ) { results.add( SearchResult.from( ExpressionExperiment.class, ee, MATCH_BY_SHORT_NAME_SCORE, Collections.singletonMap( "shortName", ee.getShortName() ), "ExpressionExperimentService.findByShortName" ) ); } } - if ( results.isEmpty() || SecurityUtil.isUserAdmin() ) { + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) || SecurityUtil.isUserAdmin() ) { ees = expressionExperimentService.findByAccession( query ); // this will find split parts for ( ExpressionExperiment e : ees ) { results.add( SearchResult.from( ExpressionExperiment.class, e, MATCH_BY_ACCESSION_SCORE, Collections.singletonMap( "id", e.getId().toString() ), "ExpressionExperimentService.findByAccession" ) ); } } - if ( results.isEmpty() ) { + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) ) { try { // maybe user put in a primary key value. ExpressionExperiment ee = expressionExperimentService.load( Long.parseLong( query ) ); @@ -296,8 +350,8 @@ public Collection> searchExpressionExperiment watch.stop(); if ( watch.getTime() > 1000 ) - DatabaseSearchSource.log.warn( "DB Expression Experiment search for " + settings + " took " + watch.getTime() - + " ms and found " + results.size() + " EEs" ); + DatabaseSearchSource.log.warn( String.format( "DB Expression Experiment search for %s with '%s' took %d ms and found %d EEs", + settings, query, watch.getTime(), results.size() ) ); return results; } @@ -307,45 +361,40 @@ public Collection> searchExpressionExperiment * tables */ @Override - public Collection> searchGene( SearchSettings settings ) { - if ( !settings.isUseDatabase() ) - return Collections.emptySet(); - + public Collection> searchGene( SearchSettings settings ) throws SearchException { StopWatch watch = StopWatch.createStarted(); - String searchString; - if ( settings.isTermQuery() ) { - // then we can get the NCBI ID, maybe. - searchString = StringUtils.substringAfterLast( prepareDatabaseQuery( settings ), "/" ); - } else { - searchString = prepareDatabaseQuery( settings ); - } + Set> results = new SearchResultSet<>( settings ); - if ( StringUtils.isBlank( searchString ) ) - return Collections.emptySet(); - - Set> results = new SearchResultSet<>(); + String searchString = prepareDatabaseQuery( settings ); + if ( searchString != null ) { + // then we can get the NCBI ID, maybe. + if ( searchString.startsWith( NCBI_GENE_ID_URI_PREFIX ) ) { + searchString = searchString.substring( NCBI_GENE_ID_URI_PREFIX.length() ); + } - /* - * First search by accession. If we find it, stop. - */ - Gene result = null; - try { - result = geneService.findByNCBIId( Integer.parseInt( searchString ) ); - } catch ( NumberFormatException e ) { - // - } - if ( result != null ) { - results.add( SearchResult.from( Gene.class, result, MATCH_BY_ID_SCORE, "GeneService.findByNCBIId" ) ); - } else { - result = geneService.findByAccession( searchString, null ); + /* + * First search by accession. If we find it, stop. + */ + Gene result = null; + try { + result = geneService.findByNCBIId( Integer.parseInt( searchString ) ); + } catch ( NumberFormatException e ) { + // + } if ( result != null ) { - results.add( SearchResult.from( Gene.class, result, MATCH_BY_ACCESSION_SCORE, "GeneService.findByAccession" ) ); + results.add( SearchResult.from( Gene.class, result, MATCH_BY_ID_SCORE, null, "GeneService.findByNCBIId" ) ); + } else { + result = geneService.findByAccession( searchString, null ); + if ( result != null ) { + results.add( SearchResult.from( Gene.class, result, MATCH_BY_ACCESSION_SCORE, null, "GeneService.findByAccession" ) ); + } } } - if ( results.isEmpty() ) { - results.addAll( searchGeneExpanded( settings ) ); + // attempt to do an inexact search if no results were yielded + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) ) { + searchGeneExpanded( settings, results ); } // filter by taxon @@ -355,9 +404,8 @@ public Collection> searchGene( SearchSettings settings ) { watch.stop(); if ( watch.getTime() > 1000 ) - DatabaseSearchSource.log - .info( "Gene DB search for " + searchString + " took " + watch.getTime() + " ms and found " - + results.size() + " genes" ); + DatabaseSearchSource.log.warn( String.format( "Gene DB search for %s with '%s' took %d ms and found %d genes", + settings, searchString, watch.getTime(), results.size() ) ); return results; } @@ -365,68 +413,88 @@ public Collection> searchGene( SearchSettings settings ) { /** * Expanded gene search used when a simple search does not yield results. */ - private Collection> searchGeneExpanded( SearchSettings settings ) { - Set> results = new SearchResultSet<>(); + private void searchGeneExpanded( SearchSettings settings, Set> results ) throws SearchException { + String inexactString = prepareDatabaseQuery( settings, true ); + if ( inexactString == null ) { + return; + } - String exactString = prepareDatabaseQuery( settings ); - String inexactString = prepareDatabaseQueryForInexactMatch( settings ); + // trim all the unescaped reserved characters from the string to get the "exact" string + String exactString = inexactString.replaceAll( "([^\\\\])[%_\\\\]", "$1" ); // if the query is shortish, always do a wild card search. This gives better behavior in 'live // search' situations. If we do wildcards on very short queries we get too many results. if ( exactString.length() <= 1 ) { // case 0: we got no results yet, or user entered a very short string. We search only for exact matches. - results.addAll( toSearchResults( Gene.class, geneService.findByOfficialSymbol( exactString ), MATCH_BY_OFFICIAL_SYMBOL_SCORE, "GeneService.findByOfficialSymbol" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneService.findByOfficialSymbol( exactString ), MATCH_BY_OFFICIAL_SYMBOL_SCORE, "GeneService.findByOfficialSymbol" ) ); } else if ( exactString.length() <= 5 ) { - if ( settings.isWildcard() ) { + if ( isWildcard( settings ) ) { // case 2: user did ask for a wildcard, if the string is 2, 3, 4 or 5 characters. - results.addAll( toSearchResults( Gene.class, geneService.findByOfficialSymbolInexact( inexactString ), MATCH_BY_OFFICIAL_SYMBOL_INEXACT_SCORE, "GeneService.findByOfficialSymbolInexact" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneService.findByOfficialSymbolInexact( inexactString ), MATCH_BY_OFFICIAL_SYMBOL_INEXACT_SCORE, "GeneService.findByOfficialSymbolInexact" ) ); } else { // case 2: user did not ask for a wildcard, but we add it anyway, if the string is 2, 3, 4 or 5 characters. - results.addAll( toSearchResults( Gene.class, geneService.findByOfficialSymbolInexact( inexactString + "%" ), MATCH_BY_OFFICIAL_SYMBOL_INEXACT_SCORE, "GeneService.findByOfficialSymbolInexact" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneService.findByOfficialSymbolInexact( inexactString + "%" ), MATCH_BY_OFFICIAL_SYMBOL_INEXACT_SCORE, "GeneService.findByOfficialSymbolInexact" ) ); } } else { - if ( settings.isWildcard() ) { + if ( isWildcard( settings ) ) { // case 3: string is long enough, and user asked for wildcard. - results.addAll( toSearchResults( Gene.class, geneService.findByOfficialSymbolInexact( inexactString ), MATCH_BY_OFFICIAL_SYMBOL_INEXACT_SCORE, "GeneService.findByOfficialSymbol" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneService.findByOfficialSymbolInexact( inexactString ), MATCH_BY_OFFICIAL_SYMBOL_INEXACT_SCORE, "GeneService.findByOfficialSymbol" ) ); } else { // case 3: string is long enough, and user did not ask for wildcard. - results.addAll( toSearchResults( Gene.class, geneService.findByOfficialSymbol( exactString ), MATCH_BY_OFFICIAL_SYMBOL_SCORE, "GeneService.findByOfficialSymbol" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneService.findByOfficialSymbol( exactString ), MATCH_BY_OFFICIAL_SYMBOL_SCORE, "GeneService.findByOfficialSymbol" ) ); } } /* * If we found a match using official symbol or name, don't bother with this */ - if ( results.isEmpty() ) { - results.addAll( toSearchResults( Gene.class, geneService.findByAlias( exactString ), MATCH_BY_ALIAS_SCORE, "GeneService.findByAlias" ) ); + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) ) { + results.addAll( toSearchResults( settings, Gene.class, geneService.findByAlias( exactString ), MATCH_BY_ALIAS_SCORE, "GeneService.findByAlias" ) ); Gene geneByEnsemblId = geneService.findByEnsemblId( exactString ); if ( geneByEnsemblId != null ) { - results.add( SearchResult.from( Gene.class, geneByEnsemblId, MATCH_BY_ACCESSION_SCORE, "GeneService.findByAlias" ) ); + results.add( SearchResult.from( Gene.class, geneByEnsemblId, MATCH_BY_ACCESSION_SCORE, null, "GeneService.findByAlias" ) ); } - results.addAll( toSearchResults( Gene.class, geneProductService.getGenesByName( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, "GeneProductService.getGenesByName" ) ); - results.addAll( toSearchResults( Gene.class, geneProductService.getGenesByNcbiId( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_ACCESSION_SCORE, "GeneProductService.getGenesByNcbiId" ) ); - results.addAll( toSearchResults( Gene.class, bioSequenceService.getGenesByAccession( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_ACCESSION_SCORE, "BioSequenceService.GetGenesByAccession" ) ); - results.addAll( toSearchResults( Gene.class, bioSequenceService.getGenesByName( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, "BioSequenceService.getGenesByName" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneProductService.getGenesByName( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, "GeneProductService.getGenesByName" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneProductService.getGenesByNcbiId( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_ACCESSION_SCORE, "GeneProductService.getGenesByNcbiId" ) ); + results.addAll( toSearchResults( settings, Gene.class, bioSequenceService.getGenesByAccession( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_ACCESSION_SCORE, "BioSequenceService.GetGenesByAccession" ) ); + results.addAll( toSearchResults( settings, Gene.class, bioSequenceService.getGenesByName( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, "BioSequenceService.getGenesByName" ) ); } - - return results; } @Override - public Collection> searchGeneSet( SearchSettings settings ) { - if ( !settings.isUseDatabase() ) + public Collection> searchGeneSet( SearchSettings settings ) throws SearchException { + String query = prepareDatabaseQuery( settings ); + if ( query == null ) { return Collections.emptySet(); + } if ( settings.getTaxon() != null ) { - return toSearchResults( GeneSet.class, this.geneSetService.findByName( settings.getQuery(), settings.getTaxon() ), MATCH_BY_NAME_SCORE, "GeneSetService.findByNameWithTaxon" ); + return toSearchResults( settings, GeneSet.class, this.geneSetService.findByName( query, settings.getTaxon() ), MATCH_BY_NAME_SCORE, "GeneSetService.findByNameWithTaxon" ); } else { - return toSearchResults( GeneSet.class, this.geneSetService.findByName( settings.getQuery() ), MATCH_BY_NAME_SCORE, "GeneSetService.findByName" ); + return toSearchResults( settings, GeneSet.class, this.geneSetService.findByName( query ), MATCH_BY_NAME_SCORE, "GeneSetService.findByName" ); } } - private static Set> toSearchResults( Class resultType, Collection entities, double score, String source ) { + @Override + public Collection> searchBlacklistedEntities( SearchSettings settings ) throws SearchException { + Collection> blacklistedResults = new SearchResultSet<>( settings ); + String query = prepareDatabaseQuery( settings ); + + if ( query == null ) { + return Collections.emptySet(); + } + + BlacklistedEntity b = blacklistedEntityService.findByAccession( query ); + if ( b != null ) { + blacklistedResults.add( SearchResult.from( BlacklistedEntity.class, b, DatabaseSearchSource.MATCH_BY_ACCESSION_SCORE, null, "BlacklistedEntityService.findByAccession" ) ); + } + + return blacklistedResults; + } + + private static Set> toSearchResults( SearchSettings settings, Class resultType, Collection entities, double score, String source ) { return entities.stream() .filter( Objects::nonNull ) - .map( e -> SearchResult.from( resultType, e, score, source ) ) - .collect( Collectors.toCollection( SearchResultSet::new ) ); + .map( e -> SearchResult.from( resultType, e, score, null, source ) ) + .collect( Collectors.toCollection( () -> new SearchResultSet<>( settings ) ) ); } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/DatabaseSearchSourceUtils.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/DatabaseSearchSourceUtils.java deleted file mode 100644 index 74021cabd9..0000000000 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/DatabaseSearchSourceUtils.java +++ /dev/null @@ -1,55 +0,0 @@ -package ubic.gemma.core.search.source; - -import ubic.gemma.model.common.search.SearchSettings; - -import java.util.Arrays; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - -public class DatabaseSearchSourceUtils { - - /** - * List of reserved characters for Lucene. - *

- * See Apache Lucene - Query Parser Syntax - * for more details about special characters. - */ - private static final String[] LUCENE_SPECIAL_CHARACTERS = "+ - && || ! ( ) { } [ ] ^ \" ~ * ? : \\".split( " " ); - - private static final String LUCENE_SPECIAL_CHARACTERS_PATTERN = Arrays.stream( LUCENE_SPECIAL_CHARACTERS ) - .map( Pattern::quote ) - .collect( Collectors.joining( "|" ) ); - - /** - * Essentially the same as {@link #LUCENE_SPECIAL_CHARACTERS}, but excluding those that are supported. - */ - private static final String LUCENE_SPECIAL_CHARACTERS_BUT_WILDCARDS_PATTERN = Arrays.stream( LUCENE_SPECIAL_CHARACTERS ) - .filter( c -> !c.equals( String.valueOf( SearchSettings.WILDCARD_CHAR ) ) && !c.equals( String.valueOf( SearchSettings.SINGLE_WILDCARD_CHAR ) ) ) - .map( Pattern::quote ) - .collect( Collectors.joining( "|" ) ); - - /** - * Escape the query for a database match. - *

- * The resulting string is free from character that would usually be used for a free-text match. - */ - public static String prepareDatabaseQuery( SearchSettings settings ) { - return settings.getQuery() - // also remove wildcards, those are for inexact matches only - .replaceAll( LUCENE_SPECIAL_CHARACTERS_PATTERN, "" ); - } - - /** - * Obtain a query suitable for an inexact match (using a LIKE SQL expression). - *

- * This query supports wildcards ('*' and '?'), all other special characters are stripped. - */ - public static String prepareDatabaseQueryForInexactMatch( SearchSettings settings ) { - return settings.getQuery() - .replaceAll( LUCENE_SPECIAL_CHARACTERS_BUT_WILDCARDS_PATTERN, "" ) - .replaceAll( "%", "\\\\%" ) - .replaceAll( "_", "\\\\_" ) - .replace( SearchSettings.WILDCARD_CHAR, '%' ) - .replace( SearchSettings.SINGLE_WILDCARD_CHAR, '_' ); - } -} diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java index cf41a0dea3..ffd351b7fc 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java @@ -5,19 +5,23 @@ import org.apache.commons.lang3.time.StopWatch; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.queryParser.MultiFieldQueryParser; +import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.util.Version; import org.hibernate.SessionFactory; import org.hibernate.search.FullTextQuery; import org.hibernate.search.FullTextSession; import org.hibernate.search.Search; -import org.hibernate.search.query.dsl.QueryBuilder; import org.springframework.beans.factory.InitializingBean; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; +import ubic.gemma.core.search.FieldAwareSearchSource; +import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; -import ubic.gemma.core.search.SearchSource; +import ubic.gemma.core.search.lucene.LuceneHighlighter; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.BibliographicReference; @@ -33,13 +37,17 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.parseSafely; + /** * Search source based on Hibernate Search. * @author poirigui */ @Component @CommonsLog -public class HibernateSearchSource implements SearchSource, InitializingBean { +public class HibernateSearchSource implements FieldAwareSearchSource, InitializingBean { + + private static final double FULL_TEXT_SCORE_PENALTY = 0.9; private static final Class[] SEARCHABLE_CLASSES = new Class[] { ExpressionExperiment.class, @@ -58,9 +66,10 @@ public class HibernateSearchSource implements SearchSource, InitializingBean { "fullTextUri", "keywords.term", "meshTerms.term", "pubAccession.accession", "title" }; private static String[] DATASET_FIELDS = { - "shortName", "name", "description", "bioAssays.name", "bioAssays.description", "bioAssays.accession.accession", - "bioAssays.sampleUsed.name", "bioAssays.sampleUsed.characteristics.value", - "bioAssays.sampleUsed.characteristics.valueUri", "characteristics.value", "characteristics.valueUri", + "shortName", "name", "description", "accession.accession", + "bioAssays.name", "bioAssays.description", "bioAssays.accession.accession", "bioAssays.sampleUsed.name", + "bioAssays.sampleUsed.characteristics.value", "bioAssays.sampleUsed.characteristics.valueUri", + "characteristics.value", "characteristics.valueUri", "experimentalDesign.name", "experimentalDesign.description", "experimentalDesign.experimentalFactors.name", "experimentalDesign.experimentalFactors.description", "experimentalDesign.experimentalFactors.category.categoryUri", @@ -89,9 +98,7 @@ public class HibernateSearchSource implements SearchSource, InitializingBean { private static String[] COMPOSITE_SEQUENCE_FIELDS = { "name", "description" }; - private static String[] prefix( String p, String... fields ) { - return Arrays.stream( fields ).map( f -> p + f ).toArray( String[]::new ); - } + private static final Map, Set> ALL_FIELDS = new HashMap<>(); static { DATASET_FIELDS = ArrayUtils.addAll( DATASET_FIELDS, prefix( "primaryPublication.", PUBLICATION_FIELDS ) ); @@ -100,6 +107,16 @@ private static String[] prefix( String p, String... fields ) { GENE_SET_FIELDS = ArrayUtils.addAll( GENE_SET_FIELDS, prefix( "literatureSources.", PUBLICATION_FIELDS ) ); GENE_SET_FIELDS = ArrayUtils.addAll( GENE_SET_FIELDS, prefix( "members.gene.", GENE_FIELDS ) ); COMPOSITE_SEQUENCE_FIELDS = ArrayUtils.addAll( COMPOSITE_SEQUENCE_FIELDS, prefix( "biologicalCharacteristic.", BIO_SEQUENCE_FIELDS ) ); + ALL_FIELDS.put( ExpressionExperiment.class, new HashSet<>( Arrays.asList( DATASET_FIELDS ) ) ); + ALL_FIELDS.put( ArrayDesign.class, new HashSet<>( Arrays.asList( PLATFORM_FIELDS ) ) ); + ALL_FIELDS.put( CompositeSequence.class, new HashSet<>( Arrays.asList( COMPOSITE_SEQUENCE_FIELDS ) ) ); + ALL_FIELDS.put( BioSequence.class, new HashSet<>( Arrays.asList( BIO_SEQUENCE_FIELDS ) ) ); + ALL_FIELDS.put( Gene.class, new HashSet<>( Arrays.asList( GENE_FIELDS ) ) ); + ALL_FIELDS.put( GeneSet.class, new HashSet<>( Arrays.asList( GENE_SET_FIELDS ) ) ); + } + + private static String[] prefix( String p, String... fields ) { + return Arrays.stream( fields ).map( f -> p + f ).toArray( String[]::new ); } @Autowired @@ -120,60 +137,68 @@ public void afterPropertiesSet() throws Exception { } @Override - public Collection> searchArrayDesign( SearchSettings settings ) throws HibernateSearchException { + public Set getFields( Class entityClass ) { + return ALL_FIELDS.getOrDefault( entityClass, Collections.emptySet() ); + } + + @Override + public boolean accepts( SearchSettings settings ) { + return settings.isUseIndices(); + } + + @Override + public Collection> searchArrayDesign( SearchSettings settings ) throws SearchException { return searchFor( settings, ArrayDesign.class, PLATFORM_FIELDS ); } @Override - public Collection> searchBibliographicReference( SearchSettings settings ) throws HibernateSearchException { + public Collection> searchBibliographicReference( SearchSettings settings ) throws SearchException { return searchFor( settings, BibliographicReference.class, PUBLICATION_FIELDS ); } @Override - public Collection> searchExperimentSet( SearchSettings settings ) throws HibernateSearchException { + public Collection> searchExperimentSet( SearchSettings settings ) throws SearchException { return searchFor( settings, ExpressionExperimentSet.class, EXPERIMENT_SET_FIELDS ); } @Override - public Collection> searchBioSequence( SearchSettings settings ) throws HibernateSearchException { + public Collection> searchBioSequence( SearchSettings settings ) throws SearchException { return searchFor( settings, BioSequence.class, BIO_SEQUENCE_FIELDS ); } @Override - public Collection> searchCompositeSequence( SearchSettings settings ) throws HibernateSearchException { + public Collection> searchCompositeSequence( SearchSettings settings ) throws SearchException { return searchFor( settings, CompositeSequence.class, COMPOSITE_SEQUENCE_FIELDS ); } @Override - public Collection> searchExpressionExperiment( SearchSettings settings ) throws HibernateSearchException { + public Collection> searchExpressionExperiment( SearchSettings settings ) throws SearchException { return searchFor( settings, ExpressionExperiment.class, DATASET_FIELDS ); } @Override - public Collection> searchGene( SearchSettings settings ) throws HibernateSearchException { + public Collection> searchGene( SearchSettings settings ) throws SearchException { return searchFor( settings, Gene.class, GENE_FIELDS ); } @Override - public Collection> searchGeneSet( SearchSettings settings ) throws HibernateSearchException { + public Collection> searchGeneSet( SearchSettings settings ) throws SearchException { return searchFor( settings, GeneSet.class, GENE_SET_FIELDS ); } - private Collection> searchFor( SearchSettings settings, Class clazz, String... fields ) throws HibernateSearchException { + private Collection> searchFor( SearchSettings settings, Class clazz, String... fields ) throws SearchException { try { FullTextSession fullTextSession = Search.getFullTextSession( sessionFactory.getCurrentSession() ); - QueryBuilder queryBuilder = fullTextSession.getSearchFactory().buildQueryBuilder().forEntity( clazz ) - .get(); - Query query = queryBuilder.keyword() - .onFields( fields ) - .matching( settings.getQuery() ) - .createQuery(); Analyzer analyzer = analyzers.get( clazz ); - Highlighter highlighter = settings.getHighlighter() != null ? settings.getHighlighter().createLuceneHighlighter( new QueryScorer( query ) ) : null; + QueryParser queryParser = new MultiFieldQueryParser( Version.LUCENE_36, fields, analyzer ); + Query query = parseSafely( settings, queryParser ); + Highlighter highlighter; String[] projection; - if ( highlighter != null ) { + if ( settings.getHighlighter() instanceof LuceneHighlighter ) { + highlighter = new Highlighter( ( ( LuceneHighlighter ) settings.getHighlighter() ).getFormatter(), new QueryScorer( query ) ); projection = new String[] { settings.isFillResults() ? FullTextQuery.THIS : FullTextQuery.ID, FullTextQuery.SCORE, FullTextQuery.DOCUMENT }; } else { + highlighter = null; projection = new String[] { settings.isFillResults() ? FullTextQuery.THIS : FullTextQuery.ID, FullTextQuery.SCORE }; } //noinspection unchecked @@ -185,9 +210,9 @@ private Collection> searchFor( SearchSe .list(); StopWatch timer = StopWatch.createStarted(); try { - Set fieldsSet = new HashSet<>( Arrays.asList( fields ) ); + DoubleSummaryStatistics stats = results.stream().mapToDouble( r -> ( Float ) r[1] ).summaryStatistics(); return results.stream() - .map( r -> searchResultFromRow( r, settings, highlighter, analyzer, fieldsSet, clazz ) ) + .map( r -> searchResultFromRow( r, settings, highlighter, analyzer, clazz, stats ) ) .filter( Objects::nonNull ) .collect( Collectors.toList() ); } finally { @@ -201,7 +226,13 @@ private Collection> searchFor( SearchSe } @Nullable - private SearchResult searchResultFromRow( Object[] row, SearchSettings settings, @Nullable Highlighter highlighter, Analyzer analyzer, Set fields, Class clazz ) { + private SearchResult searchResultFromRow( Object[] row, SearchSettings settings, @Nullable Highlighter highlighter, Analyzer analyzer, Class clazz, DoubleSummaryStatistics stats ) { + double score; + if ( stats.getMax() == stats.getMin() ) { + score = FULL_TEXT_SCORE_PENALTY; + } else { + score = FULL_TEXT_SCORE_PENALTY * ( ( Float ) row[1] - stats.getMin() ) / ( stats.getMax() - stats.getMin() ); + } if ( settings.isFillResults() ) { //noinspection unchecked T entity = ( T ) row[0]; @@ -209,9 +240,9 @@ private SearchResult searchResultFromRow( Object[] r // this happens if an entity is still in the cache, but was removed from the database return null; } - return SearchResult.from( clazz, entity, ( Float ) row[1], highlighter != null ? settings.highlightDocument( ( Document ) row[2], highlighter, analyzer, fields ) : null, "hibernateSearch" ); + return SearchResult.from( clazz, entity, score, highlighter != null ? settings.highlightDocument( ( Document ) row[2], highlighter, analyzer ) : null, "hibernateSearch" ); } else { - return SearchResult.from( clazz, ( Long ) row[0], ( Float ) row[1], highlighter != null ? settings.highlightDocument( ( Document ) row[2], highlighter, analyzer, fields ) : null, "hibernateSearch" ); + return SearchResult.from( clazz, ( Long ) row[0], score, highlighter != null ? settings.highlightDocument( ( Document ) row[2], highlighter, analyzer ) : null, "hibernateSearch" ); } } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java index 2b7baf7cd6..66875060da 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java @@ -1,11 +1,14 @@ package ubic.gemma.core.search.source; +import lombok.EqualsAndHashCode; +import lombok.Value; import lombok.extern.apachecommons.CommonsLog; import org.apache.commons.lang3.time.StopWatch; +import org.apache.lucene.queryParser.QueryParser; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import org.springframework.util.StringUtils; -import ubic.basecode.ontology.model.*; +import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; @@ -22,63 +25,186 @@ import java.net.URI; import java.util.*; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.extractTermsDnf; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.prepareTermUriQuery; + @Component @CommonsLog public class OntologySearchSource implements SearchSource { + /** + * Penalty applied on a full-text result. + */ + private static final double FULL_TEXT_SCORE_PENALTY = 0.9; + + /** + * Penalty for indirect hits. + */ + private static final double INDIRECT_HIT_PENALTY = 0.9; + + + /** + * Special indicator for exact matches. Those are stripped out when computing summary statistics and then assigned + * the value of exactly 1.0. + */ + private static final double EXACT_MATCH_SCORE = -1.0; + @Autowired private OntologyService ontologyService; @Autowired private CharacteristicService characteristicService; + @Override + public boolean accepts( SearchSettings settings ) { + return settings.isUseCharacteristics(); + } + + /** + * Search via characteristics i.e. ontology terms. + *

+ * This is an important type of search but also a point of performance issues. Searches for "specific" terms are + * generally not a big problem (yielding less than 100 results); searches for "broad" terms can return numerous + * (thousands) + * results. + */ + @Override + public Collection> searchExpressionExperiment( final SearchSettings settings ) throws SearchException { + Collection> results = new SearchResultSet<>( settings ); + + StopWatch watch = StopWatch.createStarted(); + + log.debug( "Starting EE search for " + settings ); + /* + * Note that the AND is applied only within one entity type. The fix would be to apply AND at this + * level. + * + * The tricky part here is if the user has entered a boolean query. If they put in Parkinson's disease AND + * neuron, then we want to eventually return entities that are associated with both. We don't expect to find + * single characteristics that match both. + * + * But if they put in Parkinson's disease we don't want to do two queries. + */ + Set> subclauses = extractTermsDnf( settings ); + for ( Set subclause : subclauses ) { + Collection> classResults = this.searchExpressionExperiments( settings, subclause ); + if ( !classResults.isEmpty() ) { + log.debug( String.format( "Found %d EEs matching %s", classResults.size(), String.join( " AND ", subclause ) ) ); + } + results.addAll( classResults ); + // this is an OR query, so we can stop as soon as we've retrieved enough results + if ( isFilled( results, settings ) ) { + break; + } + } + + OntologySearchSource.log.debug( String.format( "ExpressionExperiment search: %s -> %d characteristic-based hits %d ms", + settings, results.size(), watch.getTime() ) ); + + return results; + } + + /** + * Search for the Experiment query in ontologies, including items that are associated with children of matching + * query terms. That is, 'brain' should return entities tagged as 'hippocampus'. It can handle AND in searches, so + * Parkinson's + * AND neuron finds items tagged with both of those terms. The use of OR is handled by the caller. + * + * @param settings search settings + * @param clause a conjunctive clause + * @return SearchResults of Experiments + */ + private SearchResultSet searchExpressionExperiments( SearchSettings settings, Set clause ) throws SearchException { + StopWatch watch = StopWatch.createStarted(); + + // we would have to first deal with the separate queries, and then apply the logic. + SearchResultSet results = new SearchResultSet<>( settings ); + + OntologySearchSource.log.debug( "Starting characteristic search for: " + settings + " matching " + String.join( " AND ", clause ) ); + for ( String subClause : clause ) { + // at this point, subclauses have already been parsed, so if they contain special characters, those must be + // escaped + String subClauseQuery = QueryParser.escape( subClause ); + // spaces should be quoted + if ( subClauseQuery.contains( " " ) ) { + subClauseQuery = "\"" + subClauseQuery + "\""; + } + SearchResultSet subqueryResults = doSearchExpressionExperiment( + settings.withQuery( subClauseQuery ) + ); + if ( results.isEmpty() ) { + results.addAll( subqueryResults ); + } else { + // this is our Intersection operation. + results.retainAll( subqueryResults ); + } + if ( watch.getTime() > 1000 ) { + OntologySearchSource.log.warn( String.format( "Characteristic EE search for '%s': %d hits retained so far; %dms", + subClause, results.size(), watch.getTime() ) ); + watch.reset(); + watch.start(); + } + } + + return results; + } + /** * Perform a Experiment search based on annotations (anchored in ontology terms) - it does not have to be one word, * it could be "parkinson's disease"; it can also be a URI. * * @return collection of SearchResults (Experiments) */ - @Override - public Collection> searchExpressionExperiment( SearchSettings settings ) throws SearchException { + private SearchResultSet doSearchExpressionExperiment( SearchSettings settings ) throws SearchException { // overall timer StopWatch watch = StopWatch.createStarted(); // per-step timer StopWatch timer = StopWatch.create(); - Set> results = new SearchResultSet<>(); + SearchResultSet results = new SearchResultSet<>( settings ); + + Collection ontologyResults = new HashSet<>(); - Collection terms = new HashSet<>(); + Collection matchingTerms; - // f the query is a term, find it - if ( settings.isTermQuery() ) { - String termUri = settings.getQuery(); - OntologyTerm resource; - OntologyTerm r2 = ontologyService.getTerm( termUri ); + // if the query is a term, find it directly + URI termUri = prepareTermUriQuery( settings ); + if ( termUri != null ) { + OntologyResult resource; + OntologyTerm r2 = ontologyService.getTerm( termUri.toString() ); if ( r2 != null ) { - resource = new SimpleOntologyTermWithScore( r2, 1.0 ); + assert r2.getUri() != null; + resource = new OntologyResult( r2, EXACT_MATCH_SCORE ); + matchingTerms = Collections.singleton( r2 ); } else { // attempt to guess a label from othe database - Characteristic c = characteristicService.findBestByUri( settings.getQuery() ); + Characteristic c = characteristicService.findBestByUri( termUri.toString() ); if ( c != null ) { assert c.getValueUri() != null; - resource = new SimpleOntologyTermWithScore( c.getValueUri(), c.getValue(), 1.0 ); + resource = new OntologyResult( c.getValueUri(), c.getValue(), EXACT_MATCH_SCORE ); } else { - resource = new SimpleOntologyTermWithScore( termUri, getLabelFromTermUri( termUri ), 1.0 ); + resource = new OntologyResult( termUri.toString(), getLabelFromTermUri( termUri ), EXACT_MATCH_SCORE ); } + matchingTerms = Collections.emptySet(); + } + ontologyResults.add( resource ); + } else { + // Search ontology classes matches to the full-text query + timer.reset(); + timer.start(); + matchingTerms = ontologyService.findTerms( settings.getQuery() ); + matchingTerms.stream() + // ignore bnodes + .filter( t -> t.getUri() != null ) + // the only possibility for being no score is that the query is an URI and the search didn't go through + // the search index + .map( t -> new OntologyResult( t, t.getScore() != null ? t.getScore() : EXACT_MATCH_SCORE ) ) + .forEach( ontologyResults::add ); + timer.stop(); + if ( timer.getTime() > 1000 ) { + log.warn( String.format( "Found %d ontology classes matching '%s' in %d ms", + matchingTerms.size(), settings.getQuery(), timer.getTime() ) ); } - terms.add( resource ); - } - - // Search ontology classes matches to the query - timer.reset(); - timer.start(); - Collection matchingTerms = ontologyService.findTerms( settings.getQuery() ); - terms.addAll( matchingTerms ); - timer.stop(); - - if ( timer.getTime() > 100 ) { - log.warn( String.format( "Found %d ontology classes matching '%s' in %d ms", - matchingTerms.size(), settings.getQuery(), timer.getTime() ) ); } // Search for child terms. @@ -86,28 +212,42 @@ public Collection> searchExpressionExperiment // TODO: move this logic in baseCode, this can be done far more efficiently with Jena API timer.reset(); timer.start(); - terms.addAll( ontologyService.getChildren( matchingTerms, false, true ) ); + // we don't know parent/child relation, so the best we can do is assigne the average full-text score + double avgScore = matchingTerms.stream() + .mapToDouble( t -> t.getScore() != null ? t.getScore() : 0 ) + .filter( s -> s != EXACT_MATCH_SCORE ) + .average() + .orElse( 0 ); + ontologyService.getChildren( matchingTerms, false, true ) + .stream() + // ignore bnodes + .filter( c -> c.getUri() != null ) + // small penalty for being indirectly matched + .map( c -> new OntologyResult( c, INDIRECT_HIT_PENALTY * avgScore ) ) + // if a children was already in terms, it will not be added again and thus its original score will + // be reflected in the results + .forEach( ontologyResults::add ); timer.stop(); - if ( timer.getTime() > 200 ) { + if ( timer.getTime() > 1000 ) { log.warn( String.format( "Found %d ontology subclasses or related terms for %d terms matching '%s' in %d ms", - terms.size() - matchingTerms.size(), matchingTerms.size(), settings.getQuery(), timer.getTime() ) ); + ontologyResults.size() - matchingTerms.size(), matchingTerms.size(), settings.getQuery(), timer.getTime() ) ); } } timer.reset(); timer.start(); - findExperimentsByTerms( terms, settings, results ); + findExperimentsByOntologyResults( ontologyResults, settings, results ); timer.stop(); - if ( timer.getTime() > 100 ) { + if ( timer.getTime() > 1000 ) { log.warn( String.format( "Retrieved %d datasets via %d characteristics in %d ms", - results.size(), terms.size(), timer.getTime() ) ); + results.size(), ontologyResults.size(), timer.getTime() ) ); } String message = String.format( "Found %d datasets by %d characteristic URIs for '%s' in %d ms", - results.size(), terms.size(), settings.getQuery(), watch.getTime() ); - if ( watch.getTime() > 300 ) { + results.size(), ontologyResults.size(), settings.getQuery(), watch.getTime() ); + if ( watch.getTime() > 1000 ) { log.warn( message ); } else { log.debug( message ); @@ -116,32 +256,35 @@ public Collection> searchExpressionExperiment return results; } - private void findExperimentsByTerms( Collection terms, SearchSettings settings, Set> results ) { + private void findExperimentsByOntologyResults( Collection terms, SearchSettings settings, SearchResultSet results ) { // URIs are case-insensitive in the database, so should be the mapping to labels Collection uris = new HashSet<>(); Map uri2value = new TreeMap<>( String.CASE_INSENSITIVE_ORDER ); Map uri2score = new TreeMap<>( String.CASE_INSENSITIVE_ORDER ); - // renormalize the scores in a [0, 1] range + // rescale the scores in a [0, 1] range DoubleSummaryStatistics summaryStatistics = terms.stream() - .map( OntologyTerm::getScore ) - .filter( Objects::nonNull ) + .map( OntologyResult::getScore ) .mapToDouble( s -> s ) + .filter( s -> s != EXACT_MATCH_SCORE ) .summaryStatistics(); - for ( OntologyTerm term : terms ) { - // bnodes can have null URIs, how annoying... - if ( term.getUri() != null ) { - uris.add( term.getUri() ); - uri2value.put( term.getUri(), term.getLabel() ); - uri2score.put( term.getUri(), term.getScore() != null ? term.getScore() / summaryStatistics.getMax() : summaryStatistics.getAverage() / summaryStatistics.getMax() ); + for ( OntologyResult term : terms ) { + uris.add( term.getUri() ); + uri2value.put( term.getUri(), term.getLabel() ); + if ( term.getScore() == EXACT_MATCH_SCORE ) { + uri2score.put( term.getUri(), 1.0 ); + } else if ( summaryStatistics.getMax() == summaryStatistics.getMin() ) { + uri2score.put( term.getUri(), FULL_TEXT_SCORE_PENALTY ); + } else { + uri2score.put( term.getUri(), FULL_TEXT_SCORE_PENALTY * ( term.getScore() - summaryStatistics.getMin() ) / ( summaryStatistics.getMax() - summaryStatistics.getMin() ) ); } } findExpressionExperimentsByUris( uris, uri2value, uri2score, settings, results ); } - private void findExpressionExperimentsByUris( Collection uris, Map uri2value, Map uri2score, SearchSettings settings, Set> results ) { + private void findExpressionExperimentsByUris( Collection uris, Map uri2value, Map uri2score, SearchSettings settings, SearchResultSet results ) { if ( isFilled( results, settings ) ) return; @@ -152,21 +295,21 @@ private void findExpressionExperimentsByUris( Collection uris, Map> hits, String field, double scoreMultiplier, Map uri2value, Map uri2score, SearchSettings settings, Set> results ) { + private void addExperimentsByUrisHits( Map> hits, String field, double scoreMultiplier, Map uri2value, Map uri2score, SearchSettings settings, SearchResultSet results ) { for ( Map.Entry> entry : hits.entrySet() ) { String uri = entry.getKey(); String value = uri2value.get( uri ); @@ -204,12 +347,11 @@ private static int getLimit( Collection /** * Extract a label for a term URI as per {@link OntologyTerm#getLabel()}. */ - static String getLabelFromTermUri( String termUri ) { - URI components = URI.create( termUri ); - String[] segments = components.getPath().split( "/" ); + static String getLabelFromTermUri( URI termUri ) { + String[] segments = termUri.getPath().split( "/" ); // use the fragment - if ( !StringUtils.isEmpty( components.getFragment() ) ) { - return partToTerm( components.getFragment() ); + if ( !StringUtils.isEmpty( termUri.getFragment() ) ) { + return partToTerm( termUri.getFragment() ); } // pick the last non-empty segment for ( int i = segments.length - 1; i >= 0; i-- ) { @@ -218,121 +360,34 @@ static String getLabelFromTermUri( String termUri ) { } } // as a last resort, return the parsed URI - return components.toString(); + return termUri.toString(); } private static String partToTerm( String part ) { return part.replaceFirst( "_", ":" ).toUpperCase(); } - /** - * Simple ontology resource with a score. - */ - private static class SimpleOntologyTermWithScore implements OntologyTerm { - - private static final Comparator COMPARATOR = Comparator - .comparing( OntologyResource::getScore, Comparator.nullsLast( Comparator.reverseOrder() ) ) - .thenComparing( OntologyResource::getUri, Comparator.nullsLast( Comparator.naturalOrder() ) ); - - private final String uri; - private final String label; - private final double score; + @Value + @EqualsAndHashCode(of = { "uri" }) + private static class OntologyResult { + String uri; + String label; + double score; - private SimpleOntologyTermWithScore( String uri, String label, double score ) { + private OntologyResult( String uri, String label, double score ) { this.uri = uri; this.label = label; this.score = score; } - public SimpleOntologyTermWithScore( OntologyTerm resource, double score ) { + public OntologyResult( OntologyTerm resource, double score ) { this.uri = resource.getUri(); - this.label = resource.getLabel(); + if ( resource.getLabel() != null ) { + this.label = resource.getLabel(); + } else { + this.label = resource.getLocalName(); + } this.score = score; } - - @Override - public String getUri() { - return uri; - } - - @Override - public String getLabel() { - return label; - } - - @Override - public boolean isObsolete() { - return false; - } - - @Override - public Double getScore() { - return score; - } - - @Override - public int compareTo( OntologyResource ontologyResource ) { - return Objects.compare( this, ontologyResource, COMPARATOR ); - } - - @Override - public Collection getAlternativeIds() { - return null; - } - - @Override - public Collection getAnnotations() { - return null; - } - - @Override - public Collection getChildren( boolean direct, boolean includeAdditionalProperties, boolean keepObsoletes ) { - return null; - } - - @Override - public String getComment() { - return null; - } - - @Override - public Collection getIndividuals( boolean direct ) { - return null; - } - - @Override - public String getLocalName() { - return null; - } - - @Override - public Object getModel() { - return null; - } - - @Override - public Collection getParents( boolean direct, boolean includeAdditionalProperties, boolean keepObsoletes ) { - return null; - } - - @Override - public Collection getRestrictions() { - return null; - } - - @Override - public String getTerm() { - return null; - } - - @Override - public boolean isRoot() { - return false; - } - - @Override - public boolean isTermObsolete() { - return false; - } } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/tasks/maintenance/IndexerTaskImpl.java b/gemma-core/src/main/java/ubic/gemma/core/tasks/maintenance/IndexerTaskImpl.java index 248a04cf56..8128f7ae37 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/tasks/maintenance/IndexerTaskImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/tasks/maintenance/IndexerTaskImpl.java @@ -53,7 +53,9 @@ public TaskResult call() throws Exception { if ( taskCommand.isIndexGeneSet() ) { classesToIndex.add( GeneSet.class ); } - indexerService.index( classesToIndex, 4 ); + for ( Class clazz : classesToIndex ) { + indexerService.index( clazz ); + } return new TaskResult( taskCommand, null ); } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java b/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java index c574fb1ea4..0065a8f8a0 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java @@ -1,9 +1,8 @@ package ubic.gemma.core.util; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; +import org.springframework.util.Assert; + +import java.util.*; /** * Utilities and algorithms for {@link List}. @@ -44,4 +43,41 @@ private static void fillMap( Map element2position, List list } } } + + /** + * Pad a collection to the next power of 2 with the given element. + */ + public static List padToNextPowerOfTwo( List list, T elementForPadding ) { + int k = Integer.highestOneBit( list.size() ); + if ( list.size() == k ) { + return list; // already a power of 2 + } + return pad( list, elementForPadding, k << 1 ); + } + + /** + * Pad a collection with the given element. + */ + public static List pad( List list, T elementForPadding, int size ) { + Assert.isTrue( size >= list.size(), "Target size must be greater or equal to the collection size." ); + if ( list.size() == size ) { + return list; + } + List paddedList = new ArrayList<>( size ); + paddedList.addAll( list ); + for ( int j = list.size(); j < size; j++ ) { + paddedList.add( elementForPadding ); + } + return paddedList; + } + + public static List> batch( List list, int batchSize ) { + if ( batchSize == -1 ) { + return Collections.singletonList( list ); + } + int numberOfBatches = ( list.size() / batchSize ) + ( list.size() % batchSize > 0 ? 1 : 0 ); + int size = numberOfBatches * batchSize; + List paddedList = pad( list, list.get( list.size() - 1 ), size ); + return org.apache.commons.collections4.ListUtils.partition( paddedList, batchSize ); + } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/MailUtilsImpl.java b/gemma-core/src/main/java/ubic/gemma/core/util/MailUtilsImpl.java index 2c8381fa4c..0ea5501a52 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/util/MailUtilsImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/util/MailUtilsImpl.java @@ -25,7 +25,6 @@ import ubic.gemma.core.security.authentication.UserService; import ubic.gemma.model.common.auditAndSecurity.User; import ubic.gemma.persistence.util.MailEngine; -import ubic.gemma.persistence.util.Settings; /** * @author anton @@ -58,21 +57,13 @@ public void sendTaskCompletedNotificationEmail( EmailNotificationContext emailNo if ( emailAddress != null ) { MailUtilsImpl.log.info( "Sending email notification to " + emailAddress ); SimpleMailMessage msg = new SimpleMailMessage(); - msg.setTo( emailAddress ); - msg.setFrom( Settings.getAdminEmailAddress() ); - msg.setSubject( "Gemma task completed" ); - String logs = ""; if ( taskResult.getException() != null ) { logs += "Task failed with :\n"; logs += taskResult.getException().getMessage(); } - - msg.setText( - "A job you started on Gemma is completed (taskId=" + taskId + ", " + taskName + ")\n\n" + logs - + "\n" ); - - mailEngine.send( msg ); + String body = "A job you started on Gemma is completed (taskId=" + taskId + ", " + taskName + ")\n\n" + logs + "\n"; + mailEngine.sendMessage( emailAddress, "Gemma task completed", body ); } } } diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java index c18be25801..106ef9cfda 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java @@ -220,9 +220,6 @@ public void setMigratedToStatement( boolean migratedToStatement ) { @Override public int hashCode() { - if ( this.getId() != null ) { - return super.hashCode(); - } return Objects.hash( StringUtils.lowerCase( categoryUri != null ? categoryUri : category ), StringUtils.lowerCase( valueUri != null ? valueUri : value ) ); } diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/CharacteristicUtils.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/CharacteristicUtils.java index b9069f2b51..ae323713a9 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/description/CharacteristicUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/CharacteristicUtils.java @@ -6,6 +6,37 @@ public class CharacteristicUtils { + /** + * Create a new characteristic that represents the category of a given characteristic. + */ + public static Characteristic getCategory( Characteristic t ) { + Characteristic c = new Characteristic(); + c.setCategory( t.getCategory() ); + c.setCategoryUri( t.getCategoryUri() ); + return c; + } + + /** + * Check if the given characteristic is uncategorized. + */ + public static boolean isUncategorized( Characteristic c ) { + return c.getCategory() == null && c.getCategoryUri() == null; + } + + /** + * Check if the given characteristic has or is a free-text category. + */ + public static boolean isFreeTextCategory( Characteristic c ) { + return c.getCategory() != null && c.getCategoryUri() == null; + } + + /** + * Check if the given characteristic is a free-text value. + */ + public static boolean isFreeText( Characteristic c ) { + return c.getValue() != null && c.getValueUri() == null; + } + /** * Compare a pair of ontology terms. */ diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/ExternalDatabases.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/ExternalDatabases.java index cf946349d9..85c01385ab 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/description/ExternalDatabases.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/ExternalDatabases.java @@ -7,6 +7,7 @@ public final class ExternalDatabases { public static final String + GEO = "GEO", GENE = "gene", GO = "go", MULTIFUNCTIONALITY = "multifunctionality", diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettings.java b/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettings.java index 4e48b6f29f..e19dcc6bdd 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettings.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettings.java @@ -22,10 +22,11 @@ import lombok.Data; import lombok.Singular; import lombok.With; -import org.apache.commons.lang3.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import ubic.gemma.core.search.Highlighter; +import ubic.gemma.core.search.OntologyHighlighter; +import ubic.gemma.core.search.lucene.LuceneHighlighter; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; @@ -50,14 +51,10 @@ @With public class SearchSettings implements Serializable { - public static final char - WILDCARD_CHAR = '*', - SINGLE_WILDCARD_CHAR = '?'; - public enum SearchMode { /** - * Prefer correctness over speed. + * Prefer correctness to speed. */ ACCURATE, /** @@ -108,10 +105,9 @@ public static SearchSettings bibliographicReferenceSearch( String query ) { * @param arrayDesign the array design to limit the search to * @return search settings */ - public static SearchSettings compositeSequenceSearch( String query, ArrayDesign arrayDesign ) { + public static SearchSettings compositeSequenceSearch( String query, @Nullable ArrayDesign arrayDesign ) { return builder().query( query ) .resultType( CompositeSequence.class ) - .resultType( ArrayDesign.class ) .platformConstraint( arrayDesign ) // TODO: check if this was specified in the original code .build(); } @@ -136,7 +132,7 @@ public static SearchSettings expressionExperimentSearch( String query ) { * @param taxon if you want to filter by taxon (can be null) * @return search settings */ - public static SearchSettings expressionExperimentSearch( String query, Taxon taxon ) { + public static SearchSettings expressionExperimentSearch( String query, @Nullable Taxon taxon ) { return builder() .query( query ) .resultType( ExpressionExperiment.class ) @@ -151,7 +147,7 @@ public static SearchSettings expressionExperimentSearch( String query, Taxon tax * @param taxon the taxon to limit the search to (can be null) * @return search settings */ - public static SearchSettings geneSearch( String query, Taxon taxon ) { + public static SearchSettings geneSearch( String query, @Nullable Taxon taxon ) { return builder().query( query ).resultType( Gene.class ).taxon( taxon ).build(); } @@ -210,87 +206,44 @@ public static SearchSettings geneSearch( String query, Taxon taxon ) { private transient Highlighter highlighter; /** - * Get this query, trimmed. - */ - public String getQuery() { - return query == null ? null : query.trim(); - } - - /** - * Get the original query that was set by {@link #setQuery(String)}, untrimmed. - */ - @SuppressWarnings("unused") - public String getRawQuery() { - return this.query; - } - - /** - * Indicate if the query refers to an ontology term. - *

- * This is done by checking if this query starts with 'http://' for now, but there could be fancier checks performed - * in the future. - */ - public boolean isTermQuery() { - return getQuery() != null && getQuery().startsWith( "http://" ); - } - - /** - * Obtain the term URI. - * - * @deprecated use {@link #getQuery()} and {@link #isTermQuery()} instead. - * - * @return the term URI if this is a term query, otherwise null - */ - @Deprecated - public String getTermUri() { - return isTermQuery() ? getQuery() : null; - } - - /** - * Set this term URI. - * - * @deprecated URI can be set with {@link #setQuery(String)} instead. - * - * @param termUri a valid term URI, or null or a blank string - */ - @Deprecated - public void setTermUri( String termUri ) { - if ( StringUtils.isNotBlank( termUri ) && !termUri.startsWith( "http://" ) ) { - throw new IllegalArgumentException( "The term URI must be a valid URI." ); - } - setQuery( termUri ); - } - - /** - * Check if the query is a wildcard query. + * Check if this is configured to search a given result type. */ - public boolean isWildcard() { - return query.contains( String.valueOf( WILDCARD_CHAR ) ) || query.contains( String.valueOf( SINGLE_WILDCARD_CHAR ) ); + public boolean hasResultType( Class cls ) { + return resultTypes.contains( cls ); } /** - * Check if this is configured to search a given result type. + * Highlight a given field. */ - public boolean hasResultType( Class cls ) { - return resultTypes.contains( cls ); + @Nullable + public Map highlight( String value, String field ) { + return highlighter != null ? highlighter.highlight( value, field ) : null; } /** * Highlight a given ontology term. *

- * This is a shorthand for {@link #getHighlighter()} and {@link Highlighter#highlightTerm(String, String, String)} + * This is a shorthand for {@link #getHighlighter()} and {@link OntologyHighlighter#highlightTerm(String, String, String)} * that deals with a potentially null highlighter. * @see #setHighlighter(Highlighter) * @return a highlight, or null if no provider is set or the provider returns null */ @Nullable public Map highlightTerm( String termUri, String termLabel, String field ) { - return highlighter != null ? highlighter.highlightTerm( termUri, termLabel, field ) : null; + if ( highlighter instanceof OntologyHighlighter ) { + return ( ( OntologyHighlighter ) highlighter ).highlightTerm( termUri, termLabel, field ); + } else { + return null; + } } @Nullable - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter luceneHighlighter, Analyzer analyzer, Set fields ) { - return highlighter != null ? highlighter.highlightDocument( document, luceneHighlighter, analyzer, fields ) : null; + public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter luceneHighlighter, Analyzer analyzer ) { + if ( highlighter instanceof LuceneHighlighter ) { + return ( ( LuceneHighlighter ) highlighter ).highlightDocument( document, luceneHighlighter, analyzer ); + } else { + return null; + } } @Override diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettingsValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettingsValueObject.java index 9a61837e2e..8d59dcc89a 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettingsValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettingsValueObject.java @@ -18,21 +18,10 @@ */ package ubic.gemma.model.common.search; -import org.apache.commons.lang3.StringUtils; -import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; -import ubic.gemma.model.association.phenotype.PhenotypeAssociation; -import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; -import ubic.gemma.model.expression.designElement.CompositeSequence; -import ubic.gemma.model.expression.experiment.ExpressionExperiment; -import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.Taxon; -import ubic.gemma.model.genome.biosequence.BioSequence; -import ubic.gemma.model.genome.gene.GeneSet; import java.io.Serializable; -import java.util.HashSet; -import java.util.Set; /** * author: anton date: 18/03/13 diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/search/package-info.java b/gemma-core/src/main/java/ubic/gemma/model/common/search/package-info.java new file mode 100644 index 0000000000..4837e61b59 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/common/search/package-info.java @@ -0,0 +1,7 @@ +/** + * + */ +@ParametersAreNonnullByDefault +package ubic.gemma.model.common.search; + +import javax.annotation.ParametersAreNonnullByDefault; \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/DoubleVectorValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/DoubleVectorValueObject.java index 4f92824366..c2f69d097a 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/DoubleVectorValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/DoubleVectorValueObject.java @@ -245,10 +245,6 @@ public boolean isSliced() { */ public double[] standardize() { - /* - * FIXME If the values are all equal, variance == 0 and we get nothing back. So we should fill in zeros instead. - */ - /* * DoubleArrayList constructor does not make a copy, so we have to make one. */ @@ -257,7 +253,6 @@ public double[] standardize() { DescriptiveWithMissing.standardize( new DoubleArrayList( copy ) ); return copy; - } /** diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/BioAssaySet.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/BioAssaySet.java index 9ca767d21c..d0b7b601e0 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/BioAssaySet.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/BioAssaySet.java @@ -24,7 +24,6 @@ import ubic.gemma.model.expression.bioAssay.BioAssay; import javax.annotation.Nullable; -import java.util.Collection; import java.util.HashSet; import java.util.Set; diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java index e8a6891298..d20654d471 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java @@ -16,13 +16,13 @@ import gemma.gsec.model.SecuredNotChild; import lombok.extern.apachecommons.CommonsLog; -import org.hibernate.Hibernate; import org.hibernate.proxy.HibernateProxyHelper; import org.hibernate.search.annotations.*; import ubic.gemma.model.common.auditAndSecurity.curation.Curatable; import ubic.gemma.model.common.auditAndSecurity.curation.CurationDetails; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.Characteristic; +import ubic.gemma.model.common.description.DatabaseEntry; import ubic.gemma.model.common.quantitationtype.QuantitationType; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation; @@ -130,7 +130,7 @@ public Long getId() { } @Override - @Field + @Field(store = Store.YES) public String getName() { return super.getName(); } @@ -147,6 +147,13 @@ public Set getBioAssays() { return super.getBioAssays(); } + @Nullable + @Override + @IndexedEmbedded + public DatabaseEntry getAccession() { + return super.getAccession(); + } + @Override @IndexedEmbedded public BibliographicReference getPrimaryPublication() { diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java index e0f0c646fb..98318d526b 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java @@ -13,6 +13,7 @@ import org.hibernate.Hibernate; import ubic.gemma.model.annotations.GemmaWebOnly; import ubic.gemma.model.common.auditAndSecurity.curation.AbstractCuratableValueObject; +import ubic.gemma.model.common.description.ExternalDatabases; import ubic.gemma.model.genome.TaxonValueObject; import ubic.gemma.persistence.util.EntityUtils; @@ -52,6 +53,7 @@ public class ExpressionExperimentValueObject extends AbstractCuratableValueObjec @JsonIgnore private Long experimentalDesign; private String externalDatabase; + private String externalDatabaseUri; private String externalUri; private GeeqValueObject geeq; @JsonIgnore @@ -103,7 +105,10 @@ public ExpressionExperimentValueObject( ExpressionExperiment ee, boolean ignoreD if ( !ignoreAccession && ee.getAccession() != null && Hibernate.isInitialized( ee.getAccession() ) ) { this.accession = ee.getAccession().getAccession(); this.externalDatabase = ee.getAccession().getExternalDatabase().getName(); - this.externalUri = ee.getAccession().getExternalDatabase().getWebUri(); + this.externalDatabaseUri = ee.getAccession().getExternalDatabase().getWebUri(); + if ( ee.getAccession().getExternalDatabase().getName().equals( ExternalDatabases.GEO ) ) { + this.externalUri = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=" + ee.getAccession().getAccession(); + } } // EE @@ -175,7 +180,9 @@ protected ExpressionExperimentValueObject( ExpressionExperimentValueObject vo ) this.accession = vo.getAccession(); this.batchConfound = vo.getBatchConfound(); this.batchEffect = vo.getBatchEffect(); + this.batchEffectStatistics = vo.getBatchEffectStatistics(); this.externalDatabase = vo.getExternalDatabase(); + this.externalDatabaseUri = vo.getExternalDatabaseUri(); this.externalUri = vo.getExternalUri(); this.metadata = vo.getMetadata(); this.shortName = vo.getShortName(); diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/Geeq.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/Geeq.java index 7c9768baa0..8cd024a0ff 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/Geeq.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/Geeq.java @@ -38,14 +38,6 @@ public class Geeq implements Identifiable, Serializable { private static final long serialVersionUID = 4783171234360698630L; private Long id; - - /* - * FIXME: ideally we would get rid of these direct associations as these are events in the experiments audit trail. - */ - // private AuditEvent lastRun; -// private AuditEvent lastManualOverride; -// private AuditEvent lastBatchEffectChange; -// private AuditEvent lastBatchConfoundChange; private double detectedQualityScore; private double manualQualityScore; @@ -497,38 +489,6 @@ public void setqScoreSampleCorrelationVariance( double qScoreSampleCorrelationVa this.qScoreSampleCorrelationVariance = qScoreSampleCorrelationVariance; } -// public AuditEvent getLastRun() { -// return lastRun; -// } -// -// public void setLastRun( AuditEvent lastRun ) { -// this.lastRun = lastRun; -// } - -// public AuditEvent getLastManualOverride() { -// return lastManualOverride; -// } -// -// public void setLastManualOverride( AuditEvent lastManualOverride ) { -// this.lastManualOverride = lastManualOverride; -// } -// -// public AuditEvent getLastBatchEffectChange() { -// return lastBatchEffectChange; -// } -// -// public void setLastBatchEffectChange( AuditEvent lastBatchEffectChange ) { -// this.lastBatchEffectChange = lastBatchEffectChange; -// } - -// public AuditEvent getLastBatchConfoundChange() { -// return lastBatchConfoundChange; -// } -// -// public void setLastBatchConfoundChange( AuditEvent lastBatchConfoundChange ) { -// this.lastBatchConfoundChange = lastBatchConfoundChange; -// } - public boolean isNoVectors() { return noVectors; } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/AbstractDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/AbstractDao.java index e14091d257..a0683ee5aa 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/AbstractDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/AbstractDao.java @@ -21,7 +21,6 @@ import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.hibernate.FlushMode; import org.hibernate.Hibernate; import org.hibernate.SessionFactory; import org.hibernate.criterion.Projections; @@ -29,6 +28,7 @@ import org.hibernate.metadata.ClassMetadata; import org.springframework.util.Assert; import ubic.gemma.model.common.Identifiable; +import ubic.gemma.persistence.util.HibernateUtils; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -38,6 +38,8 @@ import java.util.stream.Collectors; import static java.util.Objects.requireNonNull; +import static ubic.gemma.persistence.util.QueryUtils.batchParameterList; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; /** * AbstractDao can find the generic type at runtime and simplify the code implementation of the BaseDao interface @@ -51,6 +53,7 @@ public abstract class AbstractDao implements BaseDao protected final Class elementClass; private final SessionFactory sessionFactory; private final ClassMetadata classMetadata; + private final int batchSize; protected AbstractDao( Class elementClass, SessionFactory sessionFactory ) { this( elementClass, sessionFactory, requireNonNull( sessionFactory.getClassMetadata( elementClass ), @@ -67,6 +70,7 @@ protected AbstractDao( Class elementClass, SessionFactory sessionFa this.elementClass = elementClass; this.sessionFactory = sessionFactory; this.classMetadata = classMetadata; + this.batchSize = HibernateUtils.getBatchSize( sessionFactory, classMetadata ); } @Override @@ -124,6 +128,10 @@ public T save( T entity ) { } } + /** + * This implementation is temporary and attempts to best replicate the behaviour of loading entities by multiple IDs + * introduced in Hibernate 5. Read more about this. + */ @Override public Collection load( Collection ids ) { if ( ids.isEmpty() ) { @@ -148,11 +156,19 @@ public Collection load( Collection ids ) { } } - if ( !unloadedIds.isEmpty() ) { + if ( batchSize != -1 && unloadedIds.size() > batchSize ) { + for ( Collection batch : batchParameterList( unloadedIds, batchSize ) ) { + //noinspection unchecked + results.addAll( sessionFactory.getCurrentSession() + .createCriteria( elementClass ) + .add( Restrictions.in( idPropertyName, batch ) ) + .list() ); + } + } else if ( !unloadedIds.isEmpty() ) { //noinspection unchecked results.addAll( sessionFactory.getCurrentSession() .createCriteria( elementClass ) - .add( Restrictions.in( idPropertyName, new HashSet<>( unloadedIds ) ) ) + .add( Restrictions.in( idPropertyName, optimizeParameterList( unloadedIds ) ) ) .list() ); } @@ -278,6 +294,10 @@ protected final SessionFactory getSessionFactory() { return sessionFactory; } + protected final int getBatchSize() { + return batchSize; + } + /** * Retrieve one entity whose given property matches the given value. *

diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtil.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtil.java index e940364a1d..c0b944df2d 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtil.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtil.java @@ -34,6 +34,11 @@ public interface TableMaintenanceUtil { */ String GENE2CS_QUERY_SPACE = "GENE2CS"; + /** + * Recommended batch size to use when retrieving entries from the GENE2CS table either by gene or design element. + */ + int GENE2CS_BATCH_SIZE = 2048; + /** * Query space used by the {@code EXPRESSION_EXPERIMENT2CHARACTERISTIC} table. *

@@ -63,7 +68,7 @@ public interface TableMaintenanceUtil { * @return the number of records that were created or updated */ @Secured({ "GROUP_AGENT" }) - int updateExpressionExperiment2CharacteristicEntries(); + int updateExpressionExperiment2CharacteristicEntries( boolean truncate ); /** * Update a specific level of the {@code EXPRESSION_EXPERIMENT2CHARACTERISTIC} table. @@ -72,7 +77,7 @@ public interface TableMaintenanceUtil { * @return the number of records that were created or updated */ @Secured({ "GROUP_AGENT" }) - int updateExpressionExperiment2CharacteristicEntries( Class level ); + int updateExpressionExperiment2CharacteristicEntries( Class level, boolean truncate ); /** * Update the {@code EXPRESSION_EXPERIMENT2_ARRAY_DESIGN} table. diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtilImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtilImpl.java index b8f0fe281a..c722704922 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtilImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtilImpl.java @@ -21,13 +21,11 @@ import io.micrometer.core.annotation.Timed; import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.hibernate.SessionFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; -import org.springframework.mail.SimpleMailMessage; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import ubic.gemma.model.common.Auditable; @@ -63,7 +61,7 @@ * @author jsantos * @author paul */ -@Service +@Service("tableMaintenanceUtil") public class TableMaintenanceUtilImpl implements TableMaintenanceUtil { /** @@ -91,7 +89,7 @@ public class TableMaintenanceUtilImpl implements TableMaintenanceUtil { + "group by AOI.ID), 0)"; private static final String EE2C_EE_QUERY = - "select MIN(C.ID), C.NAME, C.DESCRIPTION, C.CATEGORY, C.CATEGORY_URI, C.`VALUE`, C.VALUE_URI, C.ORIGINAL_VALUE, C.EVIDENCE_CODE, I.ID, (" + SELECT_ANONYMOUS_MASK + "), cast(? as char(256)) " + "select MIN(C.ID), C.NAME, C.DESCRIPTION, C.CATEGORY, C.CATEGORY_URI, C.`VALUE`, C.VALUE_URI, C.ORIGINAL_VALUE, C.EVIDENCE_CODE, I.ID, (" + SELECT_ANONYMOUS_MASK + "), cast(? as char(255)) " + "from INVESTIGATION I " + "join CHARACTERISTIC C on I.ID = C.INVESTIGATION_FK " + "where I.class = 'ExpressionExperiment' " @@ -148,9 +146,6 @@ public class TableMaintenanceUtilImpl implements TableMaintenanceUtil { @Value("${gemma.gene2cs.path}") private Path gene2CsInfoPath; - @Value("${gemma.admin.email}") - private String adminEmailAddress; - private boolean sendEmail = true; @Override @@ -223,8 +218,14 @@ public void updateGene2CsEntries() { @Override @Transactional @Timed - public int updateExpressionExperiment2CharacteristicEntries() { + public int updateExpressionExperiment2CharacteristicEntries( boolean truncate ) { log.info( "Updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table..." ); + if ( truncate ) { + log.info( "Truncating EXPRESSION_EXPERIMENT2CHARACTERISTIC..." ); + sessionFactory.getCurrentSession() + .createSQLQuery( "delete from EXPRESSION_EXPERIMENT2CHARACTERISTIC" ) + .executeUpdate(); + } int updated = sessionFactory.getCurrentSession() .createSQLQuery( "insert into EXPRESSION_EXPERIMENT2CHARACTERISTIC (ID, NAME, DESCRIPTION, CATEGORY, CATEGORY_URI, `VALUE`, VALUE_URI, ORIGINAL_VALUE, EVIDENCE_CODE, EXPRESSION_EXPERIMENT_FK, ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK, LEVEL) " @@ -246,7 +247,7 @@ public int updateExpressionExperiment2CharacteristicEntries() { @Override @Timed @Transactional - public int updateExpressionExperiment2CharacteristicEntries( Class level ) { + public int updateExpressionExperiment2CharacteristicEntries( Class level, boolean truncate ) { String query; if ( level.equals( ExpressionExperiment.class ) ) { query = EE2C_EE_QUERY; @@ -257,7 +258,14 @@ public int updateExpressionExperiment2CharacteristicEntries( Class level ) { } else { throw new IllegalArgumentException( "Level must be one of ExpressionExperiment.class, BioMaterial.class or ExperimentalDesign.class." ); } - log.info( "Updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table at level " + level + "..." ); + log.info( "Updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table at " + level.getSimpleName() + " level..." ); + if ( truncate ) { + log.info( "Truncating EXPRESSION_EXPERIMENT2CHARACTERISTIC at " + level.getSimpleName() + " level..." ); + sessionFactory.getCurrentSession() + .createSQLQuery( "delete from EXPRESSION_EXPERIMENT2CHARACTERISTIC where LEVEL = :level" ) + .setParameter( "level", level ) + .executeUpdate(); + } int updated = sessionFactory.getCurrentSession() .createSQLQuery( "insert into EXPRESSION_EXPERIMENT2CHARACTERISTIC (ID, NAME, DESCRIPTION, CATEGORY, CATEGORY_URI, `VALUE`, VALUE_URI, ORIGINAL_VALUE, EVIDENCE_CODE, EXPRESSION_EXPERIMENT_FK, ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK, LEVEL) " @@ -324,17 +332,7 @@ private Gene2CsStatus getLastGene2CsUpdateStatus() { private void sendEmail( Gene2CsStatus results ) { if ( !sendEmail ) return; - SimpleMailMessage msg = new SimpleMailMessage(); - if ( StringUtils.isBlank( adminEmailAddress ) ) { - TableMaintenanceUtilImpl.log - .warn( "No administrator email address could be found, so gene2cs status email will not be sent." ); - return; - } - msg.setTo( adminEmailAddress ); - msg.setSubject( "Gene2Cs update status." ); - msg.setText( "Gene2Cs updating was run.\n" + results.getAnnotation() ); - mailEngine.send( msg ); - TableMaintenanceUtilImpl.log.info( "Email notification sent to " + adminEmailAddress ); + mailEngine.sendAdminMessage( "Gene2Cs update status.", "Gene2Cs updating was run.\n" + results.getAnnotation() ); } /** diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoImpl.java index 58fc0e3c0c..66d1f6b3b3 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoImpl.java @@ -20,14 +20,16 @@ import org.apache.commons.lang3.time.StopWatch; import org.hibernate.Hibernate; -import org.hibernate.LockOptions; import org.hibernate.Query; import org.hibernate.SessionFactory; import org.hibernate.criterion.Restrictions; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; -import ubic.gemma.model.expression.experiment.*; +import ubic.gemma.model.expression.experiment.BioAssaySet; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.model.expression.experiment.ExpressionExperimentDetailsValueObject; +import ubic.gemma.model.expression.experiment.ExpressionExperimentSetValueObject; import ubic.gemma.persistence.service.AbstractDao; import ubic.gemma.persistence.service.AbstractVoEnabledDao; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentDao; @@ -36,6 +38,8 @@ import javax.annotation.Nullable; import java.util.*; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * @author paul */ @@ -147,7 +151,7 @@ private void populateAnalysisInformation( Collection withCoexp = this.getSessionFactory().getCurrentSession().createQuery( "select e.id, count(an) from ExpressionExperimentSet e, CoexpressionAnalysis an join e.experiments ea " + "where an.experimentAnalyzed = ea and e.id in (:ids) group by e.id" ) - .setParameterList( "ids", idMap.keySet() ).list(); + .setParameterList( "ids", optimizeParameterList( idMap.keySet() ) ).list(); for ( Object[] oa : withCoexp ) { Long id = ( Long ) oa[0]; @@ -164,7 +168,7 @@ private void populateAnalysisInformation( Collection ids ) { Query queryObject = this.getSessionFactory().getCurrentSession().createQuery( queryString ); if ( ids != null ) - queryObject.setParameterList( "ids", ids ); + queryObject.setParameterList( "ids", optimizeParameterList( ids ) ); return queryObject; } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/coexpression/CoexpressionAnalysisDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/coexpression/CoexpressionAnalysisDaoImpl.java index 0053ad878a..98fc4cfee6 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/coexpression/CoexpressionAnalysisDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/coexpression/CoexpressionAnalysisDaoImpl.java @@ -28,6 +28,8 @@ import java.util.Collection; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** *

* Base Spring DAO Class: is able to create, update, remove, load, and find objects of type @@ -59,7 +61,7 @@ public Collection getExperimentsWithAnalysis( Collection idsToFilter //noinspection unchecked return this.getSessionFactory().getCurrentSession().createQuery( "select experimentAnalyzed.id from CoexpressionAnalysis where experimentAnalyzed.id in (:ids)" ) - .setParameterList( "ids", idsToFilter ).list(); + .setParameterList( "ids", optimizeParameterList( idsToFilter ) ).list(); } @Override diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java index aa49bfdc75..c5fe9b3d0f 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java @@ -21,13 +21,17 @@ import org.apache.commons.collections4.ListUtils; import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.lang3.tuple.Pair; -import org.hibernate.*; +import org.hibernate.Hibernate; +import org.hibernate.HibernateException; +import org.hibernate.Session; +import org.hibernate.SessionFactory; import org.hibernate.engine.jdbc.spi.SqlStatementLogger; import org.hibernate.engine.spi.SessionImplementor; import org.hibernate.id.IdentifierGeneratorHelper; import org.hibernate.internal.SessionFactoryImpl; import org.hibernate.jdbc.Expectations; import org.hibernate.persister.entity.EntityPersister; +import org.hibernate.type.StandardBasicTypes; import org.hibernate.type.Type; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; @@ -40,9 +44,9 @@ import ubic.gemma.persistence.service.analysis.SingleExperimentAnalysisDaoBase; import ubic.gemma.persistence.util.CommonQueries; import ubic.gemma.persistence.util.EntityUtils; +import ubic.gemma.persistence.util.HibernateUtils; import java.io.Serializable; -import java.math.BigInteger; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; @@ -50,6 +54,8 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.*; + /** * @author paul * @see DifferentialExpressionAnalysis @@ -71,6 +77,8 @@ class DifferentialExpressionAnalysisDaoImpl extends SingleExperimentAnalysisDaoB private final EntityPersister resultPersister, contrastPersister; + private final int bioAssaySetBatchSize; + @Autowired public DifferentialExpressionAnalysisDaoImpl( SessionFactory sessionFactory ) { super( DifferentialExpressionAnalysis.class, sessionFactory ); @@ -78,6 +86,7 @@ public DifferentialExpressionAnalysisDaoImpl( SessionFactory sessionFactory ) { .getEntityPersister( DifferentialExpressionAnalysisResult.class.getName() ); contrastPersister = ( ( SessionFactoryImpl ) sessionFactory ) .getEntityPersister( ContrastResult.class.getName() ); + bioAssaySetBatchSize = HibernateUtils.getBatchSize( sessionFactory, sessionFactory.getClassMetadata( BioAssaySet.class ) ); } /** @@ -328,7 +337,7 @@ public Collection findByFactors( Collection findByFactors( Collection> findByExperimentIds + " inner join a.experimentAnalyzed e where e.id in (:eeIds)"; List qresult = this.getSessionFactory().getCurrentSession() .createQuery( queryString ) - .setParameterList( "eeIds", experimentIds ) + .setParameterList( "eeIds", optimizeParameterList( experimentIds ) ) .list(); for ( Object o : qresult ) { Object[] oa = ( Object[] ) o; @@ -378,7 +387,7 @@ public Collection findExperimentsWithAnalyses( Gene gene ) { Collection probes = CommonQueries .getCompositeSequences( gene, this.getSessionFactory().getCurrentSession() ); Collection result = new HashSet<>(); - if ( probes.size() == 0 ) { + if ( probes.isEmpty() ) { return result; } @@ -388,31 +397,38 @@ public Collection findExperimentsWithAnalyses( Gene gene ) { timer.reset(); timer.start(); - /* - * Note: this query misses ExpressionExperimentSubSets. The native query was implemented because HQL was always - * constructing a constraint on SubSets. See bug 2173. - */ - final String queryToUse = "select e.ID from ANALYSIS a inner join INVESTIGATION e ON a.EXPERIMENT_ANALYZED_FK = e.ID " - + "inner join BIO_ASSAY ba ON ba.EXPRESSION_EXPERIMENT_FK=e.ID " - + " inner join BIO_MATERIAL bm ON bm.ID=ba.SAMPLE_USED_FK inner join TAXON t ON bm.SOURCE_TAXON_FK=t.ID " - + " inner join COMPOSITE_SEQUENCE cs ON ba.ARRAY_DESIGN_USED_FK =cs.ARRAY_DESIGN_FK where cs.ID in " - + " (:probes) and t.ID = :taxon"; + // Note: this query misses ExpressionExperimentSubSets. The native query was implemented because HQL was always + // constructing a constraint on SubSets. See bug 2173. + // final String queryToUse = "select e.ID from ANALYSIS a inner join INVESTIGATION e ON a.EXPERIMENT_ANALYZED_FK = e.ID " + // + "inner join BIO_ASSAY ba ON ba.EXPRESSION_EXPERIMENT_FK=e.ID " + // + " inner join BIO_MATERIAL bm ON bm.ID=ba.SAMPLE_USED_FK inner join TAXON t ON bm.SOURCE_TAXON_FK=t.ID " + // + " inner join COMPOSITE_SEQUENCE cs ON ba.ARRAY_DESIGN_USED_FK =cs.ARRAY_DESIGN_FK where cs.ID in " + // + " (:probes) and t.ID = :taxon"; Taxon taxon = gene.getTaxon(); - int batchSize = 1000; - Collection batch = new HashSet<>(); - for ( CompositeSequence probe : probes ) { - batch.add( probe ); - - if ( batch.size() == batchSize ) { - this.fetchExperimentsTestingGeneNativeQuery( batch, result, queryToUse, taxon ); - batch.clear(); - } + Set ids = new HashSet<>(); + for ( Collection batch : batchParameterList( EntityUtils.getIds( probes ), 1024 ) ) { + //noinspection unchecked + ids.addAll( this.getSessionFactory().getCurrentSession() + .createSQLQuery( "select a.EXPERIMENT_ANALYZED_FK from ANALYSIS a " + + "join BIO_ASSAY ba ON ba.EXPRESSION_EXPERIMENT_FK = a.EXPERIMENT_ANALYZED_FK " + + "join BIO_MATERIAL bm ON bm.ID = ba.SAMPLE_USED_FK " + + "join TAXON t ON bm.SOURCE_TAXON_FK = t.ID " + + "join COMPOSITE_SEQUENCE cs ON ba.ARRAY_DESIGN_USED_FK = cs.ARRAY_DESIGN_FK " + + "where cs.ID in (:probes) and t.ID = :taxon" ) + .addScalar( "ID", StandardBasicTypes.LONG ) + .setParameterList( "probes", batch ) + .setParameter( "taxon", taxon ) + .list() ); } - if ( !batch.isEmpty() ) { - this.fetchExperimentsTestingGeneNativeQuery( batch, result, queryToUse, taxon ); + for ( Collection batch : batchParameterList( ids, bioAssaySetBatchSize ) ) { + //noinspection unchecked + result.addAll( this.getSessionFactory().getCurrentSession() + .createQuery( "from BioAssaySet ba where ba.id in (:ids)" ) + .setParameterList( "ids", batch ) + .list() ); } if ( timer.getTime() > 1000 ) { @@ -432,12 +448,12 @@ public Map> get final String query = "select distinct a from DifferentialExpressionAnalysis a inner join fetch a.resultSets res " + " inner join fetch res.baselineGroup" + " inner join fetch res.experimentalFactors facs inner join fetch facs.factorValues " - + " inner join fetch res.hitListSizes where a.experimentAnalyzed.id in (:ees) "; + + " inner join fetch res.hitListSizes where a.experimentAnalyzed in (:ees) "; //noinspection unchecked List r1 = this.getSessionFactory().getCurrentSession() .createQuery( query ) - .setParameterList( "ees", EntityUtils.getIds( experiments ) ) + .setParameterList( "ees", optimizeIdentifiableParameterList( experiments ) ) .list(); int count = 0; for ( DifferentialExpressionAnalysis a : r1 ) { @@ -466,11 +482,11 @@ public Map> get + " inner join fetch a.resultSets res inner join fetch res.baselineGroup " + " inner join fetch res.experimentalFactors facs inner join fetch facs.factorValues" + " inner join fetch res.hitListSizes " - + " join eess.sourceExperiment see join a.experimentAnalyzed ee where eess=ee and see.id in (:ees) "; + + " join eess.sourceExperiment see join a.experimentAnalyzed ee where eess=ee and see in (:ees) "; //noinspection unchecked List r2 = this.getSessionFactory().getCurrentSession() .createQuery( q2 ) - .setParameterList( "ees", EntityUtils.getIds( experiments ) ) + .setParameterList( "ees", optimizeIdentifiableParameterList( experiments ) ) .list(); if ( !r2.isEmpty() ) { @@ -510,7 +526,7 @@ public Collection getExperimentsWithAnalysis( Collection idsToFilter //noinspection unchecked return this.getSessionFactory().getCurrentSession() .createQuery( queryString ) - .setParameterList( "eeIds", idsToFilter ) + .setParameterList( "eeIds", optimizeParameterList( idsToFilter ) ) .list(); } @@ -548,7 +564,7 @@ public Map> getAnalysesByE "select distinct a from DifferentialExpressionAnalysis a " + "join fetch a.experimentAnalyzed e " + "where e.id in (:eeIds)" ) - .setParameterList( "eeIds", expressionExperimentIds ) + .setParameterList( "eeIds", optimizeParameterList( expressionExperimentIds ) ) .setFirstResult( offset ) .setMaxResults( limit ) .list(); @@ -571,7 +587,7 @@ public Map> getAnalysesByE fvs = this.getSessionFactory().getCurrentSession().createQuery( "select distinct ee.id, fv from " + "ExpressionExperiment" + " ee join ee.bioAssays ba join ba.sampleUsed bm join bm.factorValues fv where ee.id in (:ees)" ) - .setParameterList( "ees", expressionExperimentIds ).list(); + .setParameterList( "ees", optimizeParameterList( expressionExperimentIds ) ).list(); this.addFactorValues( ee2fv, fvs ); // also get factor values for subsets - those not found yet. @@ -580,13 +596,13 @@ public Map> getAnalysesByE used.add( a.getExperimentAnalyzed().getId() ); } - List probableSubSetIds = ListUtils.removeAll( used, ee2fv.keySet() ); + List probableSubSetIds = ListUtils.removeAll( used, ee2fv.keySet() ); if ( !probableSubSetIds.isEmpty() ) { //noinspection unchecked fvs = this.getSessionFactory().getCurrentSession().createQuery( "select distinct ee.id, fv from " + "ExpressionExperimentSubSet" + " ee join ee.bioAssays ba join ba.sampleUsed bm join bm.factorValues fv where ee.id in (:ees)" ) - .setParameterList( "ees", probableSubSetIds ).list(); + .setParameterList( "ees", optimizeParameterList( probableSubSetIds ) ).list(); this.addFactorValues( ee2fv, fvs ); } @@ -600,7 +616,7 @@ public Map> getAnalysesByE .createQuery( "select distinct a from " + "ExpressionExperimentSubSet" + " ee, DifferentialExpressionAnalysis a" + " join ee.sourceExperiment see " + " join fetch a.experimentAnalyzed eeanalyzed where see.id in (:eeids) and ee=eeanalyzed" ) - .setParameterList( "eeids", expressionExperimentIds ).list(); + .setParameterList( "eeids", optimizeParameterList( expressionExperimentIds ) ).list(); if ( !analysesOfSubsets.isEmpty() ) { hits.addAll( analysesOfSubsets ); @@ -617,7 +633,7 @@ public Map> getAnalysesByE fvs = this.getSessionFactory().getCurrentSession().createQuery( "select distinct ee.id, fv from " + "ExpressionExperimentSubSet" + " ee join ee.bioAssays ba join ba.sampleUsed bm join bm.factorValues fv where ee.id in (:ees)" ) - .setParameterList( "ees", experimentSubsetIds ).list(); + .setParameterList( "ees", optimizeParameterList( experimentSubsetIds ) ).list(); this.addFactorValues( ee2fv, fvs ); } @@ -705,7 +721,7 @@ public Map> findByExperi results.addAll( this.getSessionFactory().getCurrentSession().createQuery( "select distinct a from DifferentialExpressionAnalysis a " + "where a.experimentAnalyzed in :ees" ) - .setParameterList( "ees", experiments ).list() ); + .setParameterList( "ees", optimizeIdentifiableParameterList( experiments ) ).list() ); /* * Deal with the analyses of subsets of the investigation. User has to know this is possible. @@ -715,7 +731,7 @@ public Map> findByExperi "select distinct a from ExpressionExperimentSubSet eess, DifferentialExpressionAnalysis a " + "join eess.sourceExperiment see " + "join a.experimentAnalyzed eeanalyzed where see in :ees and eess=eeanalyzed" ) - .setParameterList( "ees", experiments ).list() ); + .setParameterList( "ees", optimizeIdentifiableParameterList( experiments ) ).list() ); return results.stream() .collect( Collectors.groupingBy( DifferentialExpressionAnalysis::getExperimentAnalyzed, Collectors.toCollection( ArrayList::new ) ) ); @@ -778,29 +794,6 @@ private Collection convertToValueObje return summaries; } - private void fetchExperimentsTestingGeneNativeQuery( Collection probes, - Collection result, final String nativeQuery, Taxon taxon ) { - - if ( probes.isEmpty() ) - return; - - SQLQuery nativeQ = this.getSessionFactory().getCurrentSession().createSQLQuery( nativeQuery ); - nativeQ.setParameterList( "probes", EntityUtils.getIds( probes ) ); - nativeQ.setParameter( "taxon", taxon ); - List list = nativeQ.list(); - Set ids = new HashSet<>(); - for ( Object o : list ) { - ids.add( ( ( BigInteger ) o ).longValue() ); - } - if ( !ids.isEmpty() ) { - //noinspection unchecked - result.addAll( this.getSessionFactory().getCurrentSession() - .createQuery( "from ExpressionExperiment e where e.id in (:ids)" ) - .setParameterList( "ids", ids ) - .list() ); - } - } - /** * Figure out which factorValues were used for each of the experimental factors (excluding the subset factor) */ diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java index 209562d31f..f6c6ba7fd5 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java @@ -26,7 +26,6 @@ import org.springframework.util.Assert; import ubic.basecode.io.ByteArrayConverter; import ubic.basecode.math.distribution.Histogram; -import ubic.basecode.util.BatchIterator; import ubic.basecode.util.SQLUtils; import ubic.gemma.model.analysis.expression.diff.*; import ubic.gemma.model.expression.experiment.BioAssaySet; @@ -43,6 +42,9 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.batchParameterList; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * This is a key class for queries to retrieve differential expression results (as well as standard CRUD aspects of * working with DifferentialExpressionResults). @@ -95,7 +97,7 @@ public Map 0 ? " order by r.correctedPvalue" : "" ) ) .setParameter( "gene", gene ) - .setParameterList( "experimentsAnalyzed", experimentsAnalyzed ) + .setParameterList( "experimentsAnalyzed", optimizeParameterList( experimentsAnalyzed ) ) .setParameter( "threshold", threshold ) .setMaxResults( limit ) .setCacheable( true ) @@ -127,7 +129,7 @@ public Map 0 ? " order by r.correctedPvalue" : "" ) ) - .setParameterList( "experimentsAnalyzed", experiments ) + .setParameterList( "experimentsAnalyzed", optimizeParameterList( experiments ) ) .setParameter( "threshold", qvalueThreshold ) .setMaxResults( limit ) .setCacheable( true ) @@ -173,7 +175,7 @@ public Map qResult = this.getSessionFactory().getCurrentSession() .createQuery( DIFF_EX_RESULTS_BY_GENE_QUERY + " and e.id in (:experimentsAnalyzed)" ) .setParameter( "gene", gene ) - .setParameterList( "experimentsAnalyzed", experimentsAnalyzed ) + .setParameterList( "experimentsAnalyzed", optimizeParameterList( experimentsAnalyzed ) ) .list(); try { return groupDiffExResultVos( qResult ); @@ -269,7 +271,7 @@ public Map> findDiffExAnalysisResultId int numResultSetBatchesDone = 0; // Iterate over batches of resultSets - for ( Collection resultSetIdBatch : new BatchIterator<>( resultSetsNeeded, resultSetBatchSize ) ) { + for ( Collection resultSetIdBatch : batchParameterList( resultSetsNeeded, resultSetBatchSize ) ) { if ( AbstractDao.log.isDebugEnabled() ) AbstractDao.log.debug( "Starting batch of resultsets: " + StringUtils @@ -290,17 +292,13 @@ public Map> findDiffExAnalysisResultId StopWatch innerQt = new StopWatch(); // iterate over batches of probes (genes) - for ( Collection probeBatch : new BatchIterator<>( cs2GeneIdMap.keySet(), geneBatchSize ) ) { + for ( Collection probeBatch : batchParameterList( cs2GeneIdMap.keySet(), geneBatchSize ) ) { if ( AbstractDao.log.isDebugEnabled() ) AbstractDao.log.debug( "Starting batch of probes: " + StringUtils .abbreviate( StringUtils.join( probeBatch, "," ), 100 ) ); - // would it help to sort the probeBatch/ - List pbL = new Vector<>( probeBatch ); - Collections.sort( pbL ); - - queryObject.setParameterList( "probe_ids", pbL ); + queryObject.setParameterList( "probe_ids", probeBatch ); innerQt.start(); List queryResult = queryObject.list(); @@ -341,7 +339,7 @@ public Map> findDiffExAnalysisResultId if ( DifferentialExpressionResultDaoImpl.CORRECTED_PVALUE_THRESHOLD_TO_BE_CONSIDERED_DIFF_EX < 1.0 ) { timeForFillingNonSig += this - .fillNonSignificant( pbL, resultSetIdsMap, resultsFromDb, resultSetIdBatch, cs2GeneIdMap, + .fillNonSignificant( probeBatch, resultSetIdsMap, resultsFromDb, resultSetIdBatch, cs2GeneIdMap, session ); } } // over probes. @@ -458,7 +456,7 @@ public Map loadContrastDetailsForResults( Collection int BATCH_SIZE = 2000; // previously: 500, then 1000. New optimized query is plenty fast. StopWatch timer = new StopWatch(); - for ( Collection batch : new BatchIterator<>( ids, BATCH_SIZE ) ) { + for ( Collection batch : batchParameterList( ids, BATCH_SIZE ) ) { timer.reset(); timer.start(); @@ -572,7 +570,7 @@ public Map ) session .createQuery( "select id,name from CompositeSequence where id in (:ids)" ) - .setParameterList( "ids", probeIds ).list() ) { + .setParameterList( "ids", optimizeParameterList( probeIds ) ).list() ) { probeNames.put( ( Long ) rec[0], ( String ) rec[1] ); } } @@ -588,7 +586,7 @@ public Map> fillFromCache( Map pbL, Map resultSetIds, + private long fillNonSignificant( Collection pbL, Map resultSetIds, Map> resultsFromDb, Collection resultSetIdBatch, Map> cs2GeneIdMap, Session session ) { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/GeneDiffExMetaAnalysisDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/GeneDiffExMetaAnalysisDaoImpl.java index 939bdeba56..453919b6fe 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/GeneDiffExMetaAnalysisDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/GeneDiffExMetaAnalysisDaoImpl.java @@ -33,6 +33,8 @@ import java.util.*; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * @author Paul */ @@ -88,7 +90,7 @@ public Collection find //noinspection unchecked List queryResults = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "aIds", metaAnalysisIds ).list(); + .setParameterList( "aIds", optimizeParameterList( metaAnalysisIds ) ).list(); for ( Object[] queryResult : queryResults ) { GeneDifferentialExpressionMetaAnalysisSummaryValueObject myMetaAnalysis = new GeneDifferentialExpressionMetaAnalysisSummaryValueObject(); @@ -140,7 +142,7 @@ public Collection getExperimentsWithAnalysis( Collection idsToFilter return this.getSessionFactory().getCurrentSession().createQuery( "select distinct a from GeneDifferentialExpressionMetaAnalysis a" + " inner join a.resultSetsIncluded rs inner join rs.analysis ra where ra.experimentAnalyzed.id in (:ids)" ) - .setParameterList( "ids", idsToFilter ).list(); + .setParameterList( "ids", optimizeParameterList( idsToFilter ) ).list(); } /** diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/association/Gene2GOAssociationDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/association/Gene2GOAssociationDaoImpl.java index b7f1d97786..53134f666a 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/association/Gene2GOAssociationDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/association/Gene2GOAssociationDaoImpl.java @@ -20,7 +20,6 @@ import org.apache.commons.lang3.time.StopWatch; import org.hibernate.Criteria; -import org.hibernate.Query; import org.hibernate.SessionFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; @@ -31,10 +30,14 @@ import ubic.gemma.persistence.service.AbstractDao; import ubic.gemma.persistence.util.BusinessKey; import ubic.gemma.persistence.util.EntityUtils; +import ubic.gemma.persistence.util.HibernateUtils; import javax.annotation.Nullable; import java.util.*; +import static ubic.gemma.persistence.util.QueryUtils.batchIdentifiableParameterList; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * @author pavlidis * @see ubic.gemma.model.association.Gene2GOAssociation @@ -42,9 +45,12 @@ @Repository public class Gene2GOAssociationDaoImpl extends AbstractDao implements Gene2GOAssociationDao { + private final int geneBatchSize; + @Autowired protected Gene2GOAssociationDaoImpl( SessionFactory sessionFactory ) { super( Gene2GOAssociation.class, sessionFactory ); + this.geneBatchSize = HibernateUtils.getBatchSize( sessionFactory, sessionFactory.getClassMetadata( Gene.class ) ); } @Override @@ -79,22 +85,25 @@ public Map> findByGenes( Collection needT Map> result = new HashMap<>(); StopWatch timer = new StopWatch(); timer.start(); - int batchSize = 200; - Set batch = new HashSet<>(); int i = 0; - for ( Gene gene : needToFind ) { - batch.add( gene ); - if ( batch.size() == batchSize ) { - result.putAll( this.fetchBatch( batch ) ); - batch.clear(); + for ( Collection batch : batchIdentifiableParameterList( needToFind, geneBatchSize ) ) { + Map giMap = EntityUtils.getIdMap( batch ); + //noinspection unchecked + List o = this.getSessionFactory().getCurrentSession() + .createQuery( "select g.id, geneAss.ontologyEntry from Gene2GOAssociation as geneAss join geneAss.gene g where g.id in (:genes)" ) + .setParameterList( "genes", giMap.keySet() ) + .list(); + for ( Object[] object : o ) { + Long g = ( Long ) object[0]; + Characteristic vc = ( Characteristic ) object[1]; + Gene gene = giMap.get( g ); + assert gene != null; + result.computeIfAbsent( gene, k -> new HashSet<>() ).add( vc ); } if ( ++i % 1000 == 0 ) { AbstractDao.log.info( "Fetched GO associations for " + i + "/" + needToFind.size() + " genes" ); } } - if ( !batch.isEmpty() ) - result.putAll( this.fetchBatch( batch ) ); - if ( timer.getTime() > 1000 ) { AbstractDao.log .info( "Fetched GO annotations for " + needToFind.size() + " genes in " + timer.getTime() + " ms" ); @@ -131,7 +140,7 @@ public Collection getGenes( Collection ids ) { return this.getSessionFactory().getCurrentSession().createQuery( "select distinct geneAss.gene from Gene2GOAssociation as geneAss " + "where geneAss.ontologyEntry.value in ( :goIDs)" ) - .setParameterList( "goIDs", ids ).list(); + .setParameterList( "goIDs", optimizeParameterList( ids ) ).list(); } @Override @@ -141,9 +150,11 @@ public Collection getGenes( Collection ids, @Nullable Taxon taxon //noinspection unchecked return this.getSessionFactory().getCurrentSession().createQuery( - "select distinct " + " gene from Gene2GOAssociation as geneAss join geneAss.gene as gene " + "select distinct gene from Gene2GOAssociation as geneAss join geneAss.gene as gene " + "where geneAss.ontologyEntry.value in ( :goIDs) and gene.taxon = :tax" ) - .setParameterList( "goIDs", ids ).setParameter( "tax", taxon ).list(); + .setParameterList( "goIDs", optimizeParameterList( ids ) ) + .setParameter( "tax", taxon ) + .list(); } @Override @@ -159,7 +170,7 @@ public int removeAll() { if ( !cIds.isEmpty() ) { removedCharacteristics = getSessionFactory().getCurrentSession() .createQuery( "delete from Characteristic where id in :cIds" ) - .setParameterList( "cIds", cIds ) + .setParameterList( "cIds", optimizeParameterList( cIds ) ) .executeUpdate(); } else { removedCharacteristics = 0; @@ -168,30 +179,4 @@ public int removeAll() { removedAssociations, removedCharacteristics ) ); return removedAssociations; } - - private Map> fetchBatch( Set batch ) { - Map giMap = EntityUtils.getIdMap( batch ); - //language=HQL - final String queryString = "select g.id, geneAss.ontologyEntry from Gene2GOAssociation as geneAss join geneAss.gene g where g.id in (:genes)"; - Map> results = new HashMap<>(); - Query query = this.getSessionFactory().getCurrentSession().createQuery( queryString ); - query.setFetchSize( batch.size() ); - query.setParameterList( "genes", giMap.keySet() ); - List o = query.list(); - - for ( Object object : o ) { - Object[] oa = ( Object[] ) object; - Long g = ( Long ) oa[0]; - Characteristic vc = ( Characteristic ) oa[1]; - Gene gene = giMap.get( g ); - assert gene != null; - if ( !results.containsKey( gene ) ) { - results.put( gene, new HashSet() ); - } - results.get( gene ).add( vc ); - } - - return results; - } - } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditEventDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditEventDaoImpl.java index 6bbe63666c..df617b100b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditEventDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditEventDaoImpl.java @@ -37,6 +37,8 @@ import java.util.function.Function; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * @author pavlidis * @see ubic.gemma.model.common.auditAndSecurity.AuditEvent @@ -107,9 +109,14 @@ public Map, Map> getLastE public Collection getNewSinceDate( Date date ) { Collection result = new HashSet<>(); for ( String clazz : AuditEventDaoImpl.AUDITABLES_TO_TRACK_FOR_WHATS_NEW ) { - String queryString = "select distinct adb from " + clazz - + " adb inner join adb.auditTrail atr inner join atr.events as ae where ae.date > :date and ae.action='C'"; - this.tryAddAllToResult( result, queryString, date ); + //noinspection unchecked + result.addAll( this.getSessionFactory().getCurrentSession() + .createQuery( "select adb from " + clazz + " adb " + + "join adb.auditTrail atr " + + "join atr.events as ae " + + "where ae.date > :date and ae.action='C' " + + "group by adb" ) + .setParameter( "date", date ).list() ); } return result; } @@ -125,9 +132,14 @@ public Collection getNewSinceDate( Date date ) { public Collection getUpdatedSinceDate( Date date ) { Collection result = new HashSet<>(); for ( String clazz : AuditEventDaoImpl.AUDITABLES_TO_TRACK_FOR_WHATS_NEW ) { - String queryString = "select distinct adb from " + clazz - + " adb inner join adb.auditTrail atr inner join atr.events as ae where ae.date > :date and ae.action='U'"; - this.tryAddAllToResult( result, queryString, date ); + //noinspection unchecked + result.addAll( this.getSessionFactory().getCurrentSession() + .createQuery( "select adb from " + clazz + " adb " + + "join adb.auditTrail atr " + + "join atr.events as ae " + + "where ae.date > :date and ae.action='U' " + + "group by adb" ) + .setParameter( "date", date ).list() ); } return result; } @@ -174,7 +186,7 @@ public Map getCreateEvents( final Collection qr = queryObject.list(); for ( Object o : qr ) { Object[] ar = ( Object[] ) o; @@ -220,8 +232,8 @@ private Map getLastEvents( final Collection qr = queryObject.list(); for ( Object o : qr ) { @@ -242,13 +254,6 @@ private Map getLastEvents( final Collection result, String queryString, Date date ) { - org.hibernate.Query queryObject = this.getSessionFactory().getCurrentSession().createQuery( queryString ); - queryObject.setParameter( "date", date ); - //noinspection unchecked - result.addAll( queryObject.list() ); - } - /** * Determine the full set of AuditEventTypes that are needed (that is, subclasses of the given class) * diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditTrailDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditTrailDaoImpl.java index 266cf83fa1..35ea164a5b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditTrailDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditTrailDaoImpl.java @@ -27,6 +27,8 @@ import java.util.Collection; import java.util.List; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * @author pavlidis * @see AuditTrailDao @@ -54,18 +56,18 @@ public int removeByIds( Collection ids ) { if ( !aeIds.isEmpty() ) { getSessionFactory().getCurrentSession() .createQuery( "delete from AuditEvent ae where ae.id in :aeIds" ) - .setParameterList( "aeIds", aeIds ) + .setParameterList( "aeIds", optimizeParameterList( aeIds ) ) .executeUpdate(); } if ( !aetIds.isEmpty() ) { getSessionFactory().getCurrentSession() .createQuery( "delete from AuditEventType aet where aet.id in :aetIds" ) - .setParameterList( "aetIds", aetIds ) + .setParameterList( "aetIds", optimizeParameterList( aetIds ) ) .executeUpdate(); } return getSessionFactory().getCurrentSession() .createQuery( "delete from AuditTrail at where at.id in :atIds" ) - .setParameterList( "atIds", ids ) + .setParameterList( "atIds", optimizeParameterList( ids ) ) .executeUpdate(); } } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/curation/AbstractCuratableDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/curation/AbstractCuratableDao.java index d7150d357f..aab6569b8e 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/curation/AbstractCuratableDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/curation/AbstractCuratableDao.java @@ -2,6 +2,8 @@ import gemma.gsec.util.SecurityUtil; import org.hibernate.SessionFactory; +import org.hibernate.metadata.ClassMetadata; +import org.hibernate.persister.entity.SingleTableEntityPersister; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; import ubic.gemma.model.common.auditAndSecurity.curation.AbstractCuratableValueObject; import ubic.gemma.model.common.auditAndSecurity.curation.Curatable; @@ -40,7 +42,7 @@ protected AbstractCuratableDao( String objectAlias, Class elementClass, Sessi super( objectAlias, elementClass, sessionFactory ); this.objectAlias = objectAlias; } - + @Override public void updateCurationDetailsFromAuditEvent( Curatable curatable, AuditEvent auditEvent ) { if ( curatable.getId() == null ) { @@ -85,6 +87,7 @@ protected void addNonTroubledFilter( Filters filters, String objectAlias ) { if ( !SecurityUtil.isUserAdmin() ) { filters.and( objectAlias, "curationDetails.troubled", Boolean.class, Filter.Operator.eq, false ); } + } /** @@ -121,15 +124,33 @@ protected String groupByIfNecessary( @Nullable Sort sort, String... oneToManyAli } /** - * Format a non-troubled filter for an HQL query. - *

- * For filtering queries, use {@link #addNonTroubledFilter(Filters, String)} instead. - * - * @param objectAlias an alias for a {@link Curatable} entity + * Form a non-troubled clause. */ - protected String formNonTroubledClause( String objectAlias ) { - //language=HQL - return SecurityUtil.isUserAdmin() ? "" : " and " + objectAlias + ".curationDetails.troubled = false"; + protected String formNonTroubledClause( String objectAlias, Class clazz ) { + String entityName = getSessionFactory().getClassMetadata( clazz ).getEntityName(); + if ( !SecurityUtil.isUserAdmin() ) { + //language=HQL + return " and " + objectAlias + " not in (select c from " + entityName + " c join c.curationDetails cd where cd.troubled = true)"; + } else { + return ""; + } + } + + /** + * Form a native non-troubled clause. + */ + protected String formNativeNonTroubledClause( String idColumn, Class clazz ) { + ClassMetadata classMetadata = getSessionFactory().getClassMetadata( clazz ); + String table = ( ( SingleTableEntityPersister ) classMetadata ) + .getTableName(); + String columnName = ( ( SingleTableEntityPersister ) classMetadata ) + .getPropertyColumnNames( "curationDetails" )[0]; + if ( !SecurityUtil.isUserAdmin() ) { + //language=SQL + return " and " + idColumn + " not in (select c.ID from " + table + " c join CURATION_DETAILS cd on c." + columnName + " = cd.ID where cd.TROUBLED)"; + } else { + return ""; + } } @Override diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/BibliographicReferenceDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/BibliographicReferenceDaoImpl.java index fd27cfb838..b63a264b2b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/BibliographicReferenceDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/BibliographicReferenceDaoImpl.java @@ -19,17 +19,19 @@ import org.hibernate.criterion.Restrictions; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; -import ubic.basecode.util.BatchIterator; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.BibliographicReferenceValueObject; import ubic.gemma.model.common.description.DatabaseEntry; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.AbstractVoEnabledDao; import ubic.gemma.persistence.util.BusinessKey; -import ubic.gemma.persistence.util.EntityUtils; +import ubic.gemma.persistence.util.HibernateUtils; import java.util.*; +import static ubic.gemma.persistence.util.QueryUtils.batchIdentifiableParameterList; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; + /** * @author pavlidis * @see BibliographicReference @@ -39,9 +41,12 @@ public class BibliographicReferenceDaoImpl extends AbstractVoEnabledDao implements BibliographicReferenceDao { + private final int eeBatchSize; + @Autowired public BibliographicReferenceDaoImpl( SessionFactory sessionFactory ) { super( BibliographicReference.class, sessionFactory ); + this.eeBatchSize = HibernateUtils.getBatchSize( sessionFactory, sessionFactory.getClassMetadata( ExpressionExperiment.class ) ); } @Override @@ -89,8 +94,8 @@ public Collection thaw( Collection> getRelatedE Map> result = new HashMap<>(); - for ( Collection batch : BatchIterator.batches( records, 200 ) ) { + for ( Collection batch : batchIdentifiableParameterList( records, eeBatchSize ) ) { //noinspection unchecked List os = this.getSessionFactory().getCurrentSession().createQuery( query ) .setParameterList( "recs", batch ).list(); for ( Object[] o : os ) { ExpressionExperiment e = ( ExpressionExperiment ) o[0]; BibliographicReference b = ( BibliographicReference ) o[1]; - if ( !result.containsKey( b ) ) { - result.put( b, new HashSet() ); - } - result.get( b ).add( e ); + result.computeIfAbsent( b, k -> new HashSet<>() ).add( e ); } } return result; diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java index b1d7b5933c..766a86c848 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java @@ -18,7 +18,6 @@ */ package ubic.gemma.persistence.service.common.description; -import org.apache.commons.collections4.ListUtils; import org.apache.commons.lang3.StringUtils; import org.hibernate.Hibernate; import org.hibernate.Query; @@ -50,6 +49,7 @@ import java.util.stream.Collectors; import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2C_QUERY_SPACE; +import static ubic.gemma.persistence.util.QueryUtils.*; /** * @author Luke @@ -98,10 +98,8 @@ public Map, Map> if ( uris.isEmpty() ) { return Collections.emptyMap(); } - //noinspection unchecked - List result = prepareExperimentsByUrisQuery( uris, taxon, limit > 0 && rankByLevel ) - .setMaxResults( limit ) - .list(); + // no need to rank if there is no limit since we're collecting in a mapping + List result = findExperimentsByUrisInternal( uris, taxon, limit > 0 && rankByLevel, limit ); if ( result.isEmpty() ) { return Collections.emptyMap(); } @@ -134,11 +132,7 @@ public Map, Map> return Collections.emptyMap(); } //noinspection unchecked - List result = prepareExperimentsByUrisQuery( uris, taxon, limit > 0 && rankByLevel ) - .setMaxResults( limit ) - .list(); - //noinspection unchecked - return result.stream().collect( Collectors.groupingBy( + return findExperimentsByUrisInternal( uris, taxon, limit > 0 && rankByLevel, limit ).stream().collect( Collectors.groupingBy( row -> ( Class ) row[0], Collectors.groupingBy( row -> ( String ) row[1], @@ -147,7 +141,7 @@ public Map, Map> Collectors.toCollection( () -> new TreeSet<>( Comparator.comparing( ExpressionExperiment::getId ) ) ) ) ) ) ); } - private Query prepareExperimentsByUrisQuery( Collection uris, @Nullable Taxon taxon, boolean rankByLevel ) { + private List findExperimentsByUrisInternal( Collection uris, @Nullable Taxon taxon, boolean rankByLevel, int limit ) { String qs = "select T.`LEVEL`, T.VALUE_URI, T.EXPRESSION_EXPERIMENT_FK from EXPRESSION_EXPERIMENT2CHARACTERISTIC T" + ( taxon != null ? " join INVESTIGATION I on T.EXPRESSION_EXPERIMENT_FK = I.ID " : "" ) + EE2CAclQueryUtils.formNativeAclJoinClause( "T.EXPRESSION_EXPERIMENT_FK" ) + " " @@ -173,8 +167,6 @@ private Query prepareExperimentsByUrisQuery( Collection uris, @Nullable query.setParameter( "bmClass", BioMaterial.class ); } - query.setParameterList( "uris", uris ); - if ( taxon != null ) { query.setParameter( "taxonId", taxon.getId() ); } @@ -183,7 +175,38 @@ private Query prepareExperimentsByUrisQuery( Collection uris, @Nullable query.setCacheable( true ); - return query; + List result; + if ( uris.size() > MAX_PARAMETER_LIST_SIZE ) { + if ( limit > 0 && rankByLevel ) { + // query is limited and order is important, we have to sort the results in memory + result = streamByBatch( query, "uris", uris, 2048, Object[].class ) + .sorted( Comparator.comparing( row -> rankClass( ( Class ) row[0] ) ) ) + .limit( limit ) + .collect( Collectors.toList() ); + } else { + // query is either unlimited or there is no ordering, batching will not affect the output + result = listByBatch( query, "uris", uris, 2048, limit ); + } + } else { + //noinspection unchecked + result = query + .setParameterList( "uris", optimizeParameterList( uris ) ) + .list(); + } + + return result; + } + + private int rankClass( Class clazz ) { + if ( clazz == ExpressionExperiment.class ) { + return 0; + } else if ( clazz == ExperimentalDesign.class ) { + return 1; + } else if ( clazz == BioMaterial.class ) { + return 2; + } else { + return 3; + } } @Override @@ -198,7 +221,7 @@ public Collection findByUri( Collection uris ) { .sorted() .collect( Collectors.toList() ); - for ( List batch : ListUtils.partition( uniqueUris, 100 ) ) { + for ( Collection batch : batchParameterList( uniqueUris, getBatchSize() ) ) { //noinspection unchecked results.addAll( this.getSessionFactory().getCurrentSession() .createQuery( "from Characteristic where valueUri in (:uris)" ) @@ -242,7 +265,7 @@ public Map countCharacteristicsByValueUriGroupedByNormalizedValue( .createQuery( "select lower(coalesce(char.valueUri, char.value)), count(char) from Characteristic char " + "where char.valueUri in :uris " + "group by coalesce(char.valueUri, char.value)" ) - .setParameterList( "uris", uniqueUris ) + .setParameterList( "uris", optimizeParameterList( uniqueUris ) ) .list() ) .stream() .collect( Collectors.toMap( row -> ( String ) row[0], row -> ( Long ) row[1] ) ); @@ -326,7 +349,7 @@ public Map getParents( Collection + "where C.ID in :ids " + "and (I.class is NULL or I.class = 'ExpressionExperiment') " // for investigations, only retrieve EEs + extraClause ) - .setParameterList( "ids", characteristicIds ) + .setParameterList( "ids", optimizeParameterList( characteristicIds ) ) .setMaxResults( maxResults ) .list(); Set characteristicsNotFound = new HashSet<>(); @@ -377,9 +400,11 @@ public Map getParents( Collection // } if ( efOK ) { + //noinspection unchecked List efResults = getSessionFactory().getCurrentSession() .createQuery( "select ef, ef.category from ExperimentalFactor ef where ef.category in :characteristics" ) - .setParameterList( "characteristics", characteristicsNotFound ).list(); + .setParameterList( "characteristics", optimizeParameterList( characteristicsNotFound ) ) + .list(); for ( Object[] row : efResults ) { charToParent.put( ( Characteristic ) row[1], ( Identifiable ) row[0] ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoImpl.java index 028f05f9d8..60b24916e9 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoImpl.java @@ -40,6 +40,8 @@ import java.util.Set; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** *

* Base Spring DAO Class: is able to create, update, remove, load, and find objects of type @@ -180,7 +182,7 @@ private void populateVectorType( Collection quantit List qtIds = getSessionFactory().getCurrentSession() .createQuery( "select distinct v.quantitationType.id from " + vectorType.getName() + " v where v.expressionExperiment = :ee and v.quantitationType.id in :ids" ) .setParameter( "ee", ee ) - .setParameterList( "ids", ids ) + .setParameterList( "ids", optimizeParameterList( ids ) ) .list(); qtIds.forEach( id -> vectorTypeById.add( id, vectorType ) ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/arrayDesign/ArrayDesignDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/arrayDesign/ArrayDesignDaoImpl.java index 85a43136cb..5e092eb9d6 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/arrayDesign/ArrayDesignDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/arrayDesign/ArrayDesignDaoImpl.java @@ -52,7 +52,9 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; +import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2AD_QUERY_SPACE; import static ubic.gemma.persistence.service.TableMaintenanceUtil.GENE2CS_QUERY_SPACE; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; /** * @author pavlidis @@ -294,7 +296,7 @@ public Map> getAuditEvents( Collection ids ) //noinspection unchecked List list = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).list(); + .setParameterList( "ids", optimizeParameterList( ids ) ).list(); Map> eventMap = new HashMap<>(); for ( Object[] o : list ) { Long id = ( Long ) o[0]; @@ -439,12 +441,11 @@ public Map isMerged( Collection ids ) { if ( ids.isEmpty() ) { return Collections.emptyMap(); } - Set distinctIds = new HashSet<>( ids ); //noinspection unchecked,rawtypes Set mergedIds = new HashSet<>( this.getSessionFactory().getCurrentSession() .createQuery( "select ad.id from ArrayDesign as ad join ad.mergees subs where ad.id in (:ids) group by ad" ) - .setParameterList( "ids", distinctIds ).list() ); - return distinctIds.stream().collect( Collectors.toMap( id -> id, mergedIds::contains ) ); + .setParameterList( "ids", optimizeParameterList( ids ) ).list() ); + return ids.stream().distinct().collect( Collectors.toMap( id -> id, mergedIds::contains ) ); } @Override @@ -452,12 +453,11 @@ public Map isMergee( final Collection ids ) { if ( ids.isEmpty() ) { return Collections.emptyMap(); } - Set distinctIds = new HashSet<>( ids ); //noinspection unchecked,rawtypes Set mergeeIds = new HashSet<>( this.getSessionFactory().getCurrentSession() .createQuery( "select ad.id from ArrayDesign as ad where ad.mergedInto.id is not null and ad.id in (:ids)" ) - .setParameterList( "ids", distinctIds ).list() ); - return distinctIds.stream().collect( Collectors.toMap( id -> id, mergeeIds::contains ) ); + .setParameterList( "ids", optimizeParameterList( ids ) ).list() ); + return ids.stream().distinct().collect( Collectors.toMap( id -> id, mergeeIds::contains ) ); } @Override @@ -465,12 +465,11 @@ public Map isSubsumed( final Collection ids ) { if ( ids.isEmpty() ) { return Collections.emptyMap(); } - Set distinctIds = new HashSet<>( ids ); //noinspection unchecked,rawtypes Set subsumedIds = new HashSet<>( this.getSessionFactory().getCurrentSession() .createQuery( "select ad.id from ArrayDesign as ad where ad.subsumingArrayDesign.id is not null and ad.id in (:ids)" ) - .setParameterList( "ids", distinctIds ).list() ); - return distinctIds.stream().collect( Collectors.toMap( id -> id, subsumedIds::contains ) ); + .setParameterList( "ids", optimizeParameterList( ids ) ).list() ); + return ids.stream().distinct().collect( Collectors.toMap( id -> id, subsumedIds::contains ) ); } @Override @@ -478,12 +477,11 @@ public Map isSubsumer( Collection ids ) { if ( ids.isEmpty() ) { return Collections.emptyMap(); } - Set distinctIds = new HashSet<>( ids ); //noinspection unchecked,rawtypes Set subsumerIds = new HashSet<>( this.getSessionFactory().getCurrentSession() .createQuery( "select ad.id from ArrayDesign as ad join ad.subsumedArrayDesigns subs where ad.id in (:ids) group by ad" ) - .setParameterList( "ids", distinctIds ).list() ); - return distinctIds.stream().collect( Collectors.toMap( id -> id, subsumerIds::contains ) ); + .setParameterList( "ids", optimizeParameterList( ids ) ).list() ); + return ids.stream().distinct().collect( Collectors.toMap( id -> id, subsumerIds::contains ) ); } @Override @@ -572,7 +570,7 @@ public long numAllCompositeSequenceWithBioSequences( Collection ids ) { "select count (distinct cs) from CompositeSequence as cs inner join cs.arrayDesign as ar " + " where ar.id in (:ids) and cs.biologicalCharacteristic.sequence is not null"; return ( Long ) this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).uniqueResult(); + .setParameterList( "ids", optimizeParameterList( ids ) ).uniqueResult(); } @Override @@ -594,7 +592,7 @@ public long numAllCompositeSequenceWithBlatResults( Collection ids ) { "select count (distinct cs) from CompositeSequence as cs inner join cs.arrayDesign as ar " + ", BlatResult as blat where blat.querySequence != null and ar.id in (:ids)"; return ( Long ) this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).uniqueResult(); + .setParameterList( "ids", optimizeParameterList( ids ) ).uniqueResult(); } @Override @@ -619,7 +617,7 @@ public long numAllCompositeSequenceWithGenes( Collection ids ) { + "where bs2gp.bioSequence=cs.biologicalCharacteristic and " + "bs2gp.geneProduct=gp and ar.id in (:ids)"; return ( Long ) this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).uniqueResult(); + .setParameterList( "ids", optimizeParameterList( ids ) ).uniqueResult(); } @Override @@ -644,7 +642,7 @@ public long numAllGenes( Collection ids ) { + "where bs2gp.bioSequence=cs.biologicalCharacteristic and " + "bs2gp.geneProduct=gp and ar.id in (:ids)"; return ( Long ) this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).uniqueResult(); + .setParameterList( "ids", optimizeParameterList( ids ) ).uniqueResult(); } @Override @@ -1003,7 +1001,8 @@ private void populateExternalReferences( Collection resu } //noinspection unchecked List r = getSessionFactory().getCurrentSession() - .createQuery( "select ad.id, e from ArrayDesign ad join ad.externalReferences e" ) + .createQuery( "select ad.id, e from ArrayDesign ad join ad.externalReferences e where ad.id in :ids" ) + .setParameterList( "ids", optimizeParameterList( EntityUtils.getIds( results ) ) ) .setCacheable( true ) .list(); Map> dbi = r.stream() @@ -1036,22 +1035,23 @@ private void populateBlacklisted( Collection vos ) { private void populateExpressionExperimentCount( Collection entities ) { Query query = this.getSessionFactory().getCurrentSession() - // using EXPRESSION_EXPERIMENT_FK, we don't need to do a jointure on the INVESTIGATION table, however - // the count reflect the number of bioassays, not EEs - .createSQLQuery( "select BA.ARRAY_DESIGN_USED_FK as ID, count(distinct BA.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from BIO_ASSAY BA " - + AclQueryUtils.formNativeAclJoinClause( "BA.EXPRESSION_EXPERIMENT_FK" ) - + AclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory() ) + " " - // FIXME: exclude troubled datasets - + "group by BA.ARRAY_DESIGN_USED_FK" - ) + .createSQLQuery( "select ee2ad.ARRAY_DESIGN_FK as ID, count(distinct ee2ad.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " + + EE2CAclQueryUtils.formNativeAclJoinClause( "ee2ad.EXPRESSION_EXPERIMENT_FK" ) + " " + + "where ee2ad.ARRAY_DESIGN_FK in :ids " + + "and not ee2ad.IS_ORIGINAL_PLATFORM" + + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "ee2ad.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + + formNativeNonTroubledClause( "ee2ad.EXPRESSION_EXPERIMENT_FK", ExpressionExperiment.class ) + + " group by ee2ad.ARRAY_DESIGN_FK" ) .addScalar( "ID", StandardBasicTypes.LONG ) - .addScalar( "EE_COUNT", StandardBasicTypes.LONG ); - AclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); - //noinspection unchecked - List list = query - .setCacheable( true ) - .list(); - Map countById = list.stream() + .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) + // ensures that the cache is invalidated when the ee2ad table is regenerated + .addSynchronizedQuerySpace( EE2AD_QUERY_SPACE ) + // ensures that the cache is invalidated when EEs or ADs are added/removed + .addSynchronizedEntityClass( ExpressionExperiment.class ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .setCacheable( true ); + EE2CAclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); + Map countById = QueryUtils.streamByBatch( query, "ids", EntityUtils.getIds( entities ), 2048, Object[].class ) .collect( Collectors.toMap( o -> ( Long ) o[0], o -> ( Long ) o[1] ) ); for ( ArrayDesignValueObject vo : entities ) { // missing implies no EEs, so zero is a valid default @@ -1061,19 +1061,25 @@ private void populateExpressionExperimentCount( Collection entities ) { Query query = this.getSessionFactory().getCurrentSession() - // using EXPRESSION_EXPERIMENT_FK, we don't need to do a jointure on the INVESTIGATION table, however - // the count reflect the number of bioassays, not EEs - .createSQLQuery( "select BA.ORIGINAL_PLATFORM_FK as ID, count(distinct BA.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from BIO_ASSAY BA " - + AclQueryUtils.formNativeAclJoinClause( "BA.EXPRESSION_EXPERIMENT_FK" ) - + AclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory() ) + " " - // FIXME: exclude troubled datasets - + "group by BA.ORIGINAL_PLATFORM_FK" ) + .createSQLQuery( "select ee2ad.ARRAY_DESIGN_FK as ID, count(distinct ee2ad.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " + + EE2CAclQueryUtils.formNativeAclJoinClause( "ee2ad.EXPRESSION_EXPERIMENT_FK" ) + " " + + "where ee2ad.ARRAY_DESIGN_FK in :ids " + + "and ee2ad.IS_ORIGINAL_PLATFORM " + // ignore noop switches + + "and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM)" + + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "ee2ad.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + + formNativeNonTroubledClause( "ee2ad.EXPRESSION_EXPERIMENT_FK", ExpressionExperiment.class ) + + " group by ee2ad.ARRAY_DESIGN_FK" ) .addScalar( "ID", StandardBasicTypes.LONG ) - .addScalar( "EE_COUNT", StandardBasicTypes.LONG ); - AclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); - //noinspection unchecked - List results = query.setCacheable( true ).list(); - Map switchedCountById = results.stream() + .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) + // ensures that the cache is invalidated when the ee2ad table is regenerated + .addSynchronizedQuerySpace( EE2AD_QUERY_SPACE ) + // ensures that the cache is invalidated when EEs or ADs are added/removed + .addSynchronizedEntityClass( ExpressionExperiment.class ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .setCacheable( true ); + EE2CAclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); + Map switchedCountById = QueryUtils.streamByBatch( query, "ids", EntityUtils.getIds( entities ), 2048, Object[].class ) .collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); for ( ArrayDesignValueObject vo : entities ) { // missing implies no switched EEs, so zero is a valid default diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/AbstractDesignElementDataVectorDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/AbstractDesignElementDataVectorDao.java index 51d61c3a92..9f56829005 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/AbstractDesignElementDataVectorDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/AbstractDesignElementDataVectorDao.java @@ -26,7 +26,6 @@ import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssayData.BioAssayDimension; import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector; -import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.AbstractDao; @@ -34,6 +33,8 @@ import java.util.HashSet; import java.util.Set; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; + /** * @author pavlidis * @see ubic.gemma.model.expression.bioAssayData.DesignElementDataVector @@ -84,7 +85,7 @@ public void thaw( Collection designElementDataVectors ) { eeTimer.start(); this.getSessionFactory().getCurrentSession() .createQuery( "select ee from ExpressionExperiment ee where ee in :ees" ) - .setParameterList( "ees", ees ) + .setParameterList( "ees", optimizeIdentifiableParameterList( ees ) ) .list(); eeTimer.stop(); } @@ -101,7 +102,7 @@ public void thaw( Collection designElementDataVectors ) { + "left join fetch fv.experimentalFactor " + "fetch all properties " + "where bad in :dims" ) - .setParameterList( "dims", dims ) + .setParameterList( "dims", optimizeIdentifiableParameterList( dims ) ) .list(); dimTimer.stop(); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorDaoImpl.java index 31eebd0d09..0890641df9 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorDaoImpl.java @@ -27,7 +27,6 @@ import org.springframework.stereotype.Repository; import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix; import ubic.basecode.dataStructure.matrix.DoubleMatrix; -import ubic.basecode.util.BatchIterator; import ubic.gemma.core.analysis.preprocess.normalize.QuantileNormalizer; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrixUtil; @@ -55,6 +54,8 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.*; + /** * @author Paul */ @@ -302,9 +303,7 @@ public Map>> getRanks( } Map>> result = new HashMap<>(); - BatchIterator batchIterator = new BatchIterator<>( cs2gene.keySet(), 500 ); - - for ( Collection batch : batchIterator ) { + for ( Collection batch : batchIdentifiableParameterList( cs2gene.keySet(), 512 ) ) { //language=HQL //noinspection unchecked @@ -313,7 +312,7 @@ public Map>> getRanks( + "where dedv.designElement in ( :cs ) and dedv.expressionExperiment in (:ees) " + "group by dedv.designElement, dedv.expressionExperiment" ) .setParameter( "cs", batch ) - .setParameterList( "ees", expressionExperiments ) + .setParameterList( "ees", optimizeIdentifiableParameterList( expressionExperiments ) ) .list(); for ( Object[] o : qr ) { @@ -352,7 +351,7 @@ public Map> getRanks( ExpressionExperiment expressionEx "select dedv.designElement, dedv.rankByMean, dedv.rankByMax from ProcessedExpressionDataVector dedv " + "where dedv.designElement in (:cs) and dedv.expressionExperiment = :ee " + "group by dedv.designElement, dedv.expressionExperiment" ) - .setParameterList( "cs", cs2gene.keySet() ) + .setParameterList( "cs", optimizeIdentifiableParameterList( cs2gene.keySet() ) ) .setParameter( "ee", expressionExperiment ) .list(); @@ -416,7 +415,7 @@ public Map>> ge + "from ProcessedExpressionDataVector dedv " + "where dedv.designElement in (:cs) and dedv.expressionExperiment in (:ees) " + "group by dedv.designElement, dedv.expressionExperiment" ) - .setParameterList( "cs", cs2gene.keySet() ) + .setParameterList( "cs", optimizeIdentifiableParameterList( cs2gene.keySet() ) ) .setParameterList( "ees", expressionExperiments ) .list(); @@ -493,8 +492,8 @@ public void removeProcessedDataVectors( ExpressionExperiment expressionExperimen qtsToRemove.forEach( expressionExperiment.getQuantitationTypes()::remove ); this.getSessionFactory().getCurrentSession().update( expressionExperiment ); this.getSessionFactory().getCurrentSession() - .createQuery( "delete from QuantitationType where id in (:ids)" ) - .setParameterList( "ids", EntityUtils.getIds( qtsToRemove ) ); + .createQuery( "delete from QuantitationType qt where qt in (:qts)" ) + .setParameterList( "qts", optimizeIdentifiableParameterList( qtsToRemove ) ); } } @@ -726,7 +725,7 @@ private Map> getBioAssayDimensions( + "inner join bad.bioAssays badba " + "where e in (:ees) and b in (badba) " + "group by e, bad" ) - .setParameterList( "ees", ees ) + .setParameterList( "ees", optimizeIdentifiableParameterList( ees ) ) .list(); for ( Object[] o : r ) { @@ -1002,9 +1001,9 @@ private Map> getProcessedVectors "select dedv, dedv.designElement.id from ProcessedExpressionDataVector dedv fetch all properties" + " where dedv.designElement.id in ( :cs ) " + ( ees != null ? " and dedv.expressionExperiment in :ees" : "" ) ) - .setParameterList( "cs", cs2gene.keySet() ); + .setParameterList( "cs", optimizeParameterList( cs2gene.keySet() ) ); if ( ees != null ) { - queryObject.setParameterList( "ees", ees ); + queryObject.setParameterList( "ees", optimizeIdentifiableParameterList( ees ) ); } Map> dedv2genes = new HashMap<>(); //noinspection unchecked @@ -1034,8 +1033,8 @@ private Map> getProcessedVectors } /** - * @param limit if non-null and positive, you will get a random set of vectors for the experiment * @param ee ee + * @param limit if >0, you will get a "random" set of vectors for the experiment * @return processed data vectors */ private Collection getProcessedVectors( ExpressionExperiment ee, int limit ) { @@ -1046,7 +1045,7 @@ private Collection getProcessedVectors( Expressio StopWatch timer = new StopWatch(); timer.start(); - List result; + Collection result = new HashSet<>(); Integer availableVectorCount = ee.getNumberOfDataVectors(); if ( availableVectorCount == null || availableVectorCount == 0 ) { @@ -1054,26 +1053,39 @@ private Collection getProcessedVectors( Expressio // cannot fix this here, because we're read-only. } + /* + * To help ensure we get a good random set of items, we can do several queries with different random offsets. + */ + // int numSegments = 2; + // int segmentSize = ( int ) Math.ceil( limit / numSegments ); + int segmentSize = limit; +// if ( limit < numSegments ) { +// segmentSize = limit; +// } + Query q = this.getSessionFactory().getCurrentSession() .createQuery( " from ProcessedExpressionDataVector dedv " - + "where dedv.expressionExperiment = :ee" ); + + "where dedv.expressionExperiment = :ee and dedv.rankByMean > 0.5 order by RAND()" ); // order by rand() works? q.setParameter( "ee", ee ); - q.setMaxResults( limit ); - if ( availableVectorCount != null && availableVectorCount > limit ) { - q.setFirstResult( new Random().nextInt( availableVectorCount - limit ) ); - } + q.setMaxResults( segmentSize ); - // we should already be read-only, so this is probably pointless. - q.setReadOnly( true ); + int k = 0; + while ( result.size() < limit ) { + // int firstResult = new Random().nextInt( availableVectorCount - segmentSize ); + // q.setFirstResult( firstResult ); + List list = q.list(); + // log.info( list.size() + " retrieved this time firstResult=" + 0 ); + result.addAll( list ); + k++; + } - // and so this probably doesn't do anything useful. - q.setFlushMode( FlushMode.MANUAL ); + if ( result.size() > limit ) { + result = result.stream().limit( limit ).collect( Collectors.toSet() ); + } - //noinspection unchecked - result = q.list(); if ( timer.getTime() > 1000 ) AbstractDao.log - .info( "Fetch " + limit + " vectors from " + ee.getShortName() + ": " + timer.getTime() + "ms" ); + .info( "Fetch " + result.size() + " vectors from " + ee.getShortName() + ": " + timer.getTime() + "ms, " + k + " queries were run." ); if ( result.isEmpty() ) { AbstractDao.log.warn( "Experiment does not have any processed data vectors" ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/RawExpressionDataVectorDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/RawExpressionDataVectorDaoImpl.java index a981915865..4757ec8eb0 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/RawExpressionDataVectorDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/RawExpressionDataVectorDaoImpl.java @@ -24,6 +24,8 @@ import java.util.Collection; import java.util.HashSet; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; + /** * @author paul */ @@ -58,10 +60,9 @@ public Collection find( Collection d //noinspection unchecked return this.getSessionFactory().getCurrentSession().createQuery( "select dev from RawExpressionDataVector as dev " - + "join dev.designElement as de " // no need for the fetch jointures since the design elements and biological characteristics are already in the session - + "where de in (:des) and dev.quantitationType = :qt" ) - .setParameterList( "des", designElements ) + + "where dev.designElement in (:des) and dev.quantitationType = :qt" ) + .setParameterList( "des", optimizeIdentifiableParameterList( designElements ) ) .setParameter( "qt", quantitationType ) .list(); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/designElement/CompositeSequenceDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/designElement/CompositeSequenceDaoImpl.java index 6e57a336e9..5ec7076ecb 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/designElement/CompositeSequenceDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/designElement/CompositeSequenceDaoImpl.java @@ -21,12 +21,14 @@ import gemma.gsec.util.SecurityUtil; import org.apache.commons.lang3.time.StopWatch; -import org.hibernate.*; +import org.hibernate.Criteria; +import org.hibernate.Hibernate; +import org.hibernate.Query; +import org.hibernate.SessionFactory; import org.hibernate.criterion.Restrictions; import org.hibernate.type.StandardBasicTypes; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; -import ubic.basecode.util.BatchIterator; import ubic.gemma.model.association.BioSequence2GeneProduct; import ubic.gemma.model.common.description.DatabaseEntry; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; @@ -45,7 +47,10 @@ import javax.annotation.Nullable; import java.util.*; +import static ubic.gemma.persistence.service.TableMaintenanceUtil.GENE2CS_BATCH_SIZE; import static ubic.gemma.persistence.service.TableMaintenanceUtil.GENE2CS_QUERY_SPACE; +import static ubic.gemma.persistence.util.QueryUtils.batchIdentifiableParameterList; +import static ubic.gemma.persistence.util.QueryUtils.batchParameterList; /** * @author pavlidis @@ -54,7 +59,8 @@ public class CompositeSequenceDaoImpl extends AbstractQueryFilteringVoEnabledDao implements CompositeSequenceDao { - private static final int PROBE_TO_GENE_MAP_BATCH_SIZE = 2000; + private static final int PROBE_TO_GENE_MAP_BATCH_SIZE = 2048; + /** * Absolute maximum number of records to return when fetching raw summaries. This is necessary to avoid retrieving * millions of records (some sequences are repeats and can have >200,000 records. @@ -236,42 +242,27 @@ public CompositeSequence findByName( ArrayDesign arrayDesign, final String name public Map> getGenes( Collection compositeSequences ) { Map> returnVal = new HashMap<>(); - int BATCH_SIZE = 2000; - - if ( compositeSequences.size() == 0 ) + if ( compositeSequences.isEmpty() ) return returnVal; - /* - * Get the cs->gene mapping - */ - final String nativeQuery = "SELECT CS, GENE FROM GENE2CS WHERE CS IN (:csids) "; - for ( CompositeSequence cs : compositeSequences ) { - returnVal.put( cs, new HashSet() ); + returnVal.put( cs, new HashSet<>() ); } + /* + * Get the cs->gene mapping + */ List csGene = new ArrayList<>(); - Session session = this.getSessionFactory().getCurrentSession(); - org.hibernate.SQLQuery queryObject = session.createSQLQuery( nativeQuery ); - queryObject.addScalar( "cs", StandardBasicTypes.LONG ); - queryObject.addScalar( "gene", StandardBasicTypes.LONG ); - queryObject.addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ); - queryObject.addSynchronizedEntityClass( ArrayDesign.class ); - queryObject.addSynchronizedEntityClass( CompositeSequence.class ); - queryObject.addSynchronizedEntityClass( Gene.class ); - - Collection csIdBatch = new HashSet<>(); - for ( CompositeSequence cs : compositeSequences ) { - csIdBatch.add( cs.getId() ); - - if ( csIdBatch.size() == BATCH_SIZE ) { - queryObject.setParameterList( "csids", csIdBatch ); - csGene.addAll( queryObject.list() ); - csIdBatch.clear(); - } - } - - if ( csIdBatch.size() > 0 ) { + Query queryObject = this.getSessionFactory().getCurrentSession() + .createSQLQuery( "SELECT CS, GENE FROM GENE2CS WHERE CS IN (:csids)" ) + .addScalar( "cs", StandardBasicTypes.LONG ) + .addScalar( "gene", StandardBasicTypes.LONG ) + .addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .addSynchronizedEntityClass( CompositeSequence.class ) + .addSynchronizedEntityClass( Gene.class ); + + for ( Collection csIdBatch : batchParameterList( EntityUtils.getIds( compositeSequences ), GENE2CS_BATCH_SIZE ) ) { queryObject.setParameterList( "csids", csIdBatch ); csGene.addAll( queryObject.list() ); } @@ -305,25 +296,12 @@ public Map> getGenes( Collection batch = new HashSet<>(); Collection genes = new HashSet<>(); String geneQuery = "from Gene g where g.id in ( :gs )"; - - org.hibernate.Query geneQueryObject = this.getSessionFactory().getCurrentSession().createQuery( geneQuery ) - .setFetchSize( 1000 ); - - for ( Long gene : genesToFetch ) { - batch.add( gene ); - if ( batch.size() == BATCH_SIZE ) { - AbstractDao.log.debug( "Processing batch ... " ); - geneQueryObject.setParameterList( "gs", batch ); - //noinspection unchecked - genes.addAll( geneQueryObject.list() ); - batch.clear(); - } - } - - if ( batch.size() > 0 ) { + org.hibernate.Query geneQueryObject = this.getSessionFactory().getCurrentSession() + .createQuery( geneQuery ); + for ( Collection batch : batchParameterList( genesToFetch, GENE2CS_BATCH_SIZE ) ) { + AbstractDao.log.debug( "Processing batch ... " ); geneQueryObject.setParameterList( "gs", batch ); //noinspection unchecked genes.addAll( geneQueryObject.list() ); @@ -397,14 +375,10 @@ public Map> getGenesWithS + " composite sequences" ); Map> results = new HashMap<>(); - BatchIterator it = BatchIterator - .batches( compositeSequences, CompositeSequenceDaoImpl.PROBE_TO_GENE_MAP_BATCH_SIZE ); - StopWatch timer = new StopWatch(); timer.start(); int total = 0; - for ( ; it.hasNext(); ) { - Collection batch = it.next(); + for ( Collection batch : batchIdentifiableParameterList( compositeSequences, CompositeSequenceDaoImpl.PROBE_TO_GENE_MAP_BATCH_SIZE ) ) { this.batchGetGenesWithSpecificity( batch, results ); total += batch.size(); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/BlacklistedEntityDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/BlacklistedEntityDaoImpl.java index c50b11c44d..ec05035ee3 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/BlacklistedEntityDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/BlacklistedEntityDaoImpl.java @@ -39,6 +39,8 @@ import java.util.Set; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * * @author paul @@ -142,7 +144,7 @@ public int removeAll() { if ( !deIds.isEmpty() ) { removedDe = getSessionFactory().getCurrentSession() .createQuery( "delete from DatabaseEntry where id in :deIds" ) - .setParameterList( "deIds", deIds ) + .setParameterList( "deIds", optimizeParameterList( deIds ) ) .executeUpdate(); } else { removedDe = 0; diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java index f3d637b7c4..8ae858cd49 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java @@ -170,7 +170,7 @@ public interface ExpressionExperimentDao *

* Experiments are not filtered by ACLs and toubled experiments are only visible to administrators. */ - Map getPerTaxonCount( List ids ); + Map getPerTaxonCount( Collection ids ); Map getPopulatedFactorCounts( Collection ids ); @@ -271,8 +271,6 @@ Map> getSampleRemovalEvents( */ List getExperimentalDesignAnnotations( ExpressionExperiment expressionExperiment ); - Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ); - /** * Special indicator for free-text terms. *

@@ -287,6 +285,8 @@ Map> getSampleRemovalEvents( */ String UNCATEGORIZED = "[uncategorized_" + RandomStringUtils.randomAlphanumeric( 10 ) + "]"; + Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ); + /** * Obtain annotations usage frequency for a set of given {@link ExpressionExperiment} IDs. *

diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index a7f95558a7..d77eae323c 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -62,11 +62,14 @@ import javax.annotation.Nullable; import java.util.*; import java.util.stream.Collectors; +import java.util.stream.Stream; import static java.util.stream.Collectors.groupingBy; import static java.util.stream.Collectors.summingLong; +import static ubic.gemma.model.common.description.CharacteristicUtils.*; import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2AD_QUERY_SPACE; import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2C_QUERY_SPACE; +import static ubic.gemma.persistence.util.QueryUtils.*; /** * @author pavlidis @@ -77,8 +80,6 @@ public class ExpressionExperimentDaoImpl extends AbstractCuratableDao implements ExpressionExperimentDao { - private static final int BATCH_SIZE = 1000; - private static final String CHARACTERISTIC_ALIAS = CharacteristicDao.OBJECT_ALIAS, BIO_MATERIAL_CHARACTERISTIC_ALIAS = "bmc", @@ -133,7 +134,7 @@ public Collection filterByTaxon( @Nullable Collection ids, Taxon tax //noinspection unchecked return this.getSessionFactory().getCurrentSession().createQuery( queryString ).setParameter( "taxon", taxon ) - .setParameterList( "ids", ids ).list(); + .setParameterList( "ids", optimizeParameterList( ids ) ).list(); } @Override @@ -247,44 +248,21 @@ public ExpressionExperiment findByBioMaterial( BioMaterial bm ) { @Override public Map findByBioMaterials( Collection bms ) { - if ( bms.size() == 0 ) { + if ( bms.isEmpty() ) { return new HashMap<>(); } - //language=HQL - final String queryString = "select ee, sample from ExpressionExperiment as ee " - + "inner join ee.bioAssays as ba inner join ba.sampleUsed as sample where sample in (:bms) group by ee, sample"; - + //noinspection unchecked + List r = this.getSessionFactory().getCurrentSession() + .createQuery( "select ee, sample from ExpressionExperiment as ee " + + "inner join ee.bioAssays as ba inner join ba.sampleUsed as sample where sample in (:bms) group by ee, sample" ) + .setParameterList( "bms", optimizeIdentifiableParameterList( bms ) ) + .list(); Map results = new HashMap<>(); - Collection batch = new HashSet<>(); - - for ( BioMaterial o : bms ) { - batch.add( o ); - if ( batch.size() == ExpressionExperimentDaoImpl.BATCH_SIZE ) { - - //noinspection unchecked - List r = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "bms", batch ).list(); - for ( Object a : r ) { - ExpressionExperiment e = ( ExpressionExperiment ) ( ( Object[] ) a )[0]; - BioMaterial b = ( BioMaterial ) ( ( Object[] ) a )[1]; // representative, there may have been multiple used as inputs - results.put( e, b ); - } - batch.clear(); - } - } - - if ( batch.size() > 0 ) { - - //noinspection unchecked - List r = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "bms", batch ).list(); - for ( Object a : r ) { - ExpressionExperiment e = ( ExpressionExperiment ) ( ( Object[] ) a )[0]; - BioMaterial b = ( BioMaterial ) ( ( Object[] ) a )[1]; // representative, there may have been multiple used as inputs - results.put( e, b ); - } + for ( Object[] a : r ) { + ExpressionExperiment e = ( ExpressionExperiment ) a[0]; + BioMaterial b = ( BioMaterial ) a[1]; // representative, there may have been multiple used as inputs + results.put( e, b ); } - return results; } @@ -321,19 +299,13 @@ public ExpressionExperiment findByDesign( ExperimentalDesign ed ) { @Override public ExpressionExperiment findByFactor( ExperimentalFactor ef ) { - //language=HQL - final String queryString = - "select ee from ExpressionExperiment as ee inner join ee.experimentalDesign ed " - + "inner join ed.experimentalFactors ef where ef = :ef group by ee"; - - List results = this.getSessionFactory().getCurrentSession().createQuery( queryString ).setParameter( "ef", ef ) - .list(); - - if ( results.size() == 0 ) { - AbstractDao.log.info( "There is no expression experiment that has factor = " + ef ); - return null; - } - return ( ExpressionExperiment ) results.iterator().next(); + return ( ExpressionExperiment ) this.getSessionFactory().getCurrentSession() + .createQuery( "select distinct ee from ExpressionExperiment as ee " + + "join ee.experimentalDesign ed " + + "join ed.experimentalFactors ef " + + "where ef = :ef" ) + .setParameter( "ef", ef ) + .uniqueResult(); } @Override @@ -343,64 +315,32 @@ public ExpressionExperiment findByFactorValue( FactorValue fv ) { @Override public ExpressionExperiment findByFactorValue( Long factorValueId ) { - //language=HQL - final String queryString = - "select ee from ExpressionExperiment as ee inner join ee.experimentalDesign ed " - + "inner join ed.experimentalFactors ef inner join ef.factorValues fv where fv.id = :fvId group by ee"; - - //noinspection unchecked - List results = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameter( "fvId", factorValueId ).list(); - - if ( results.size() == 0 ) { - return null; - } - - return results.get( 0 ); + return ( ExpressionExperiment ) this.getSessionFactory().getCurrentSession() + .createQuery( "select distinct ee from ExpressionExperiment as ee " + + "join ee.experimentalDesign ed " + + "join ed.experimentalFactors ef " + + "join ef.factorValues fv " + + "where fv.id = :fvId" ) + .setParameter( "fvId", factorValueId ) + .uniqueResult(); } @Override public Map findByFactorValues( Collection fvs ) { - if ( fvs.isEmpty() ) return new HashMap<>(); - - //language=HQL - final String queryString = "select ee, f from ExpressionExperiment ee " - + " join ee.experimentalDesign ed join ed.experimentalFactors ef join ef.factorValues f" - + " where f in (:fvs) group by ee, f"; Map results = new HashMap<>(); - Collection batch = new HashSet<>(); - for ( FactorValue o : fvs ) { - batch.add( o ); - if ( batch.size() == ExpressionExperimentDaoImpl.BATCH_SIZE ) { - - //noinspection unchecked - List r2 = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "fvs", batch ).list(); - for ( Object o1 : r2 ) { - Object[] a = ( Object[] ) o1; - results.put( ( ExpressionExperiment ) a[0], ( FactorValue ) a[1] ); - } - - batch.clear(); - } - } - - if ( batch.size() > 0 ) { - - //noinspection unchecked - List r2 = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "fvs", batch ).list(); - for ( Object o1 : r2 ) { - Object[] a = ( Object[] ) o1; - results.put( ( ExpressionExperiment ) a[0], ( FactorValue ) a[1] ); - } - + //noinspection unchecked + List r2 = this.getSessionFactory().getCurrentSession() + .createQuery( "select ee, f from ExpressionExperiment ee " + + "join ee.experimentalDesign ed join ed.experimentalFactors ef join ef.factorValues f " + + "where f in (:fvs) group by ee, f" ) + .setParameterList( "fvs", optimizeIdentifiableParameterList( fvs ) ) + .list(); + for ( Object[] row : r2 ) { + results.put( ( ExpressionExperiment ) row[0], ( FactorValue ) row[1] ); } - return results; - } @Override @@ -470,7 +410,7 @@ public List findByUpdatedLimit( Collection ids, int String queryString = "select e from ExpressionExperiment e join e.curationDetails s where e.id in (:ids) order by s.lastUpdated desc "; Query q = s.createQuery( queryString ); - q.setParameterList( "ids", ids ); + q.setParameterList( "ids", optimizeParameterList( ids ) ); q.setMaxResults( limit ); //noinspection unchecked @@ -513,9 +453,14 @@ public Map getAnnotationCounts( Collection ids ) { if ( ids.size() == 0 ) { return results; } - String queryString = "select e.id,count(c.id) from ExpressionExperiment e inner join e.characteristics c where e.id in (:ids) group by e.id"; - List res = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).list(); + //noinspection unchecked + List res = this.getSessionFactory().getCurrentSession() + .createQuery( "select e.id, count(c.id) from ExpressionExperiment e " + + "join e.characteristics c " + + "where e.id in (:ids) " + + "group by e" ) + .setParameterList( "ids", optimizeParameterList( ids ) ) + .list(); for ( Object[] ro : res ) { Long id = ( Long ) ro[0]; @@ -543,11 +488,11 @@ public Collection getAnnotationsByBioMaterials( for ( Characteristic c : raw ) { // filter. Could include this in the query if it isn't too complicated. - if ( c.getCategoryUri() == null ) { + if ( isUncategorized( c ) || isFreeTextCategory( c ) ) { continue; } - if ( c.getValueUri() == null ) { + if ( isFreeText( c ) ) { continue; } @@ -676,7 +621,8 @@ private List getAnnotationsByLevel( ExpressionExperiment express } @Override - public Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ) { + public Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ) { + boolean doAclFiltering = eeIds == null; if ( eeIds != null && eeIds.isEmpty() ) { return Collections.emptyMap(); } @@ -704,45 +650,71 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti excludedTermUris = excludedTermUris.stream().filter( Objects::nonNull ).collect( Collectors.toList() ); } } - String query = "select T.CATEGORY as CATEGORY, T.CATEGORY_URI as CATEGORY_URI, count(distinct T.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2CHARACTERISTIC T " - + EE2CAclQueryUtils.formNativeAclJoinClause( "T.EXPRESSION_EXPERIMENT_FK" ) + " " - + "where T.EXPRESSION_EXPERIMENT_FK is not null "; + String query = "select T.CATEGORY as CATEGORY, T.CATEGORY_URI as CATEGORY_URI, count(distinct T.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2CHARACTERISTIC T "; + if ( doAclFiltering ) { + query += EE2CAclQueryUtils.formNativeAclJoinClause( "T.EXPRESSION_EXPERIMENT_FK" ) + " "; + } if ( eeIds != null ) { - query += " and T.EXPRESSION_EXPERIMENT_FK in :eeIds"; + query += "where T.EXPRESSION_EXPERIMENT_FK in :eeIds"; + } else { + query += "where T.EXPRESSION_EXPERIMENT_FK is not null"; } query += getExcludeUrisClause( excludedCategoryUris, excludedTermUris, excludeFreeTextCategories, excludeFreeTextTerms, excludeUncategorized, retainedTermUris ); - query += EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "T.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " - + "group by COALESCE(T.CATEGORY_URI, T.CATEGORY) " - + "order by EE_COUNT desc"; + if ( doAclFiltering ) { + query += EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "T.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ); + // troubled filtering + query += formNativeNonTroubledClause( "T.EXPRESSION_EXPERIMENT_FK", ExpressionExperiment.class ); + } + query += " group by COALESCE(T.CATEGORY_URI, T.CATEGORY)"; + if ( maxResults > 0 ) { + query += " order by EE_COUNT desc"; + } Query q = getSessionFactory().getCurrentSession().createSQLQuery( query ) .addScalar( "CATEGORY", StandardBasicTypes.STRING ) .addScalar( "CATEGORY_URI", StandardBasicTypes.STRING ) .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) .addSynchronizedQuerySpace( EE2C_QUERY_SPACE ) .addSynchronizedEntityClass( ExpressionExperiment.class ) - .addSynchronizedEntityClass( Characteristic.class ) - .setCacheable( true ); - if ( eeIds != null ) { - q.setParameterList( "eeIds", new HashSet<>( eeIds ) ); - } + .addSynchronizedEntityClass( Characteristic.class ); if ( excludedCategoryUris != null && !excludedCategoryUris.isEmpty() ) { - q.setParameterList( "excludedCategoryUris", excludedCategoryUris ); + q.setParameterList( "excludedCategoryUris", optimizeParameterList( excludedCategoryUris ) ); } if ( excludedTermUris != null && !excludedTermUris.isEmpty() ) { - q.setParameterList( "excludedTermUris", excludedTermUris ); + q.setParameterList( "excludedTermUris", optimizeParameterList( excludedTermUris ) ); } if ( retainedTermUris != null && !retainedTermUris.isEmpty() ) { - q.setParameterList( "retainedTermUris", retainedTermUris ); + q.setParameterList( "retainedTermUris", optimizeParameterList( retainedTermUris ) ); } - EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); - //noinspection unchecked - List result = q.list(); - TreeMap byC = new TreeMap<>( Characteristic.getByCategoryComparator() ); - for ( Object[] row : result ) { - Characteristic c = Characteristic.Factory.newInstance( null, null, null, null, ( String ) row[0], ( String ) row[1], null ); - byC.put( c, ( Long ) row[2] ); + if ( doAclFiltering ) { + EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); } - return byC; + q.setCacheable( true ); + List result; + if ( eeIds != null ) { + if ( eeIds.size() > MAX_PARAMETER_LIST_SIZE ) { + result = listByBatch( q, "eeIds", eeIds, 2048 ); + if ( maxResults > 0 ) { + return aggregateByCategory( result ).entrySet().stream() + .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) + .limit( maxResults ) + .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue ) ); + } + } else { + //noinspection unchecked + result = q + .setParameterList( "eeIds", optimizeParameterList( eeIds ) ) + .setMaxResults( maxResults ) + .list(); + } + } else { + //noinspection unchecked + result = q.setMaxResults( maxResults ).list(); + } + return aggregateByCategory( result ); + } + + private Map aggregateByCategory( List result ) { + return result.stream().collect( Collectors.groupingBy( row -> Characteristic.Factory.newInstance( null, null, null, null, ( String ) row[0], ( String ) row[1], null ), Collectors.summingLong( row -> ( Long ) row[2] ) ) ); } /** @@ -751,6 +723,7 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti */ @Override public Map getAnnotationsUsageFrequency( @Nullable Collection eeIds, @Nullable Class level, int maxResults, int minFrequency, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ) { + boolean doAclFiltering = eeIds == null; if ( eeIds != null && eeIds.isEmpty() ) { return Collections.emptyMap(); } @@ -778,11 +751,14 @@ public Map getAnnotationsUsageFrequency( @Nullable Collect excludedTermUris = excludedTermUris.stream().filter( Objects::nonNull ).collect( Collectors.toList() ); } } - String query = "select T.`VALUE` as `VALUE`, T.VALUE_URI as VALUE_URI, T.CATEGORY as CATEGORY, T.CATEGORY_URI as CATEGORY_URI, T.EVIDENCE_CODE as EVIDENCE_CODE, count(distinct T.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2CHARACTERISTIC T " - + EE2CAclQueryUtils.formNativeAclJoinClause( "T.EXPRESSION_EXPERIMENT_FK" ) + " " - + "where T.EXPRESSION_EXPERIMENT_FK is not null"; // this is necessary for the clause building since there might be no clause + String query = "select T.`VALUE` as `VALUE`, T.VALUE_URI as VALUE_URI, T.CATEGORY as CATEGORY, T.CATEGORY_URI as CATEGORY_URI, T.EVIDENCE_CODE as EVIDENCE_CODE, count(distinct T.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2CHARACTERISTIC T "; + if ( doAclFiltering ) { + query += EE2CAclQueryUtils.formNativeAclJoinClause( "T.EXPRESSION_EXPERIMENT_FK" ) + " "; + } if ( eeIds != null ) { - query += " and T.EXPRESSION_EXPERIMENT_FK in :eeIds"; + query += "where T.EXPRESSION_EXPERIMENT_FK in :eeIds"; + } else { + query += "where T.EXPRESSION_EXPERIMENT_FK is not null"; // this is necessary for the clause building since there might be no clause } if ( level != null ) { query += " and T.LEVEL = :level"; @@ -804,16 +780,25 @@ else if ( category.startsWith( "http://" ) ) { // all categories are requested, we may filter out excluded ones query += getExcludeUrisClause( excludedCategoryUris, excludedTermUris, excludeFreeTextCategories, excludeFreeTextTerms, excludeUncategorized, retainedTermUris ); } - query += EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "T.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " - + "group by " + if ( doAclFiltering ) { + query += EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "T.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ); + query += formNativeNonTroubledClause( "T.EXPRESSION_EXPERIMENT_FK", ExpressionExperiment.class ); + } + //language=HQL + query += " group by " // no need to group by category if a specific one is requested + ( category == null ? "COALESCE(T.CATEGORY_URI, T.CATEGORY), " : "" ) - + "COALESCE(T.VALUE_URI, T.`VALUE`) " - + ( minFrequency > 0 ? "having EE_COUNT >= :minFrequency " : "" ); - if ( retainedTermUris != null && !retainedTermUris.isEmpty() ) { - query += " or VALUE_URI in (:retainedTermUris)"; + + "COALESCE(T.VALUE_URI, T.`VALUE`)"; + // if there are too many EE IDs, they will be retrieved by batch and filtered in-memory + if ( minFrequency > 1 && ( eeIds == null || eeIds.size() <= MAX_PARAMETER_LIST_SIZE ) ) { + query += " having EE_COUNT >= :minFrequency"; + if ( retainedTermUris != null && !retainedTermUris.isEmpty() ) { + query += " or VALUE_URI in (:retainedTermUris)"; + } + } + if ( maxResults > 0 ) { + query += " order by EE_COUNT desc"; } - query += "order by EE_COUNT desc"; Query q = getSessionFactory().getCurrentSession().createSQLQuery( query ) .addScalar( "VALUE", StandardBasicTypes.STRING ) .addScalar( "VALUE_URI", StandardBasicTypes.STRING ) @@ -824,38 +809,55 @@ else if ( category.startsWith( "http://" ) ) { .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) .addSynchronizedQuerySpace( EE2C_QUERY_SPACE ) .addSynchronizedEntityClass( ExpressionExperiment.class ) - .addSynchronizedEntityClass( Characteristic.class ) // ensures that the cache is invalidated if characteristics are added or removed - .setCacheable( true ) - .setMaxResults( maxResults ); - if ( eeIds != null ) { - q.setParameterList( "eeIds", new HashSet<>( eeIds ) ); - } + .addSynchronizedEntityClass( Characteristic.class ); // ensures that the cache is invalidated if characteristics are added or removed if ( category != null && !category.equals( UNCATEGORIZED ) ) { q.setParameter( "category", category ); } if ( excludedCategoryUris != null && !excludedCategoryUris.isEmpty() ) { - q.setParameterList( "excludedCategoryUris", excludedCategoryUris ); + q.setParameterList( "excludedCategoryUris", optimizeParameterList( excludedCategoryUris ) ); } if ( excludedTermUris != null && !excludedTermUris.isEmpty() ) { - q.setParameterList( "excludedTermUris", excludedTermUris ); + q.setParameterList( "excludedTermUris", optimizeParameterList( excludedTermUris ) ); } if ( retainedTermUris != null && !retainedTermUris.isEmpty() ) { - q.setParameterList( "retainedTermUris", retainedTermUris ); + q.setParameterList( "retainedTermUris", optimizeParameterList( retainedTermUris ) ); } if ( level != null ) { q.setParameter( "level", level ); } - if ( minFrequency > 0 ) { + if ( minFrequency > 1 && ( eeIds == null || eeIds.size() <= MAX_PARAMETER_LIST_SIZE ) ) { q.setParameter( "minFrequency", minFrequency ); } - EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); - //noinspection unchecked - List result = q.list(); - TreeMap byC = new TreeMap<>( Characteristic.getByCategoryAndValueComparator() ); - for ( Object[] row : result ) { - byC.put( convertRowToCharacteristic( row ), ( Long ) row[5] ); + if ( doAclFiltering ) { + EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); + } + q.setCacheable( true ); + List result; + if ( eeIds != null ) { + if ( eeIds.size() > MAX_PARAMETER_LIST_SIZE ) { + result = listByBatch( q, "eeIds", eeIds, 2048 ); + if ( minFrequency > 1 || maxResults > 0 ) { + return aggregateByCategoryAndValue( result ).entrySet().stream() + .filter( e -> e.getValue() >= minFrequency || ( retainedTermUris != null && retainedTermUris.contains( e.getKey().getValueUri() ) ) ) + .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) + .limit( maxResults > 0 ? maxResults : Long.MAX_VALUE ) + .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue ) ); + } + } else { + //noinspection unchecked + result = q.setParameterList( "eeIds", optimizeParameterList( eeIds ) ) + .setMaxResults( maxResults ) + .list(); + } + } else { + //noinspection unchecked + result = q.setMaxResults( maxResults ).list(); } - return byC; + return aggregateByCategoryAndValue( result ); + } + + private Map aggregateByCategoryAndValue( List result ) { + return result.stream().collect( Collectors.groupingBy( this::convertRowToCharacteristic, Collectors.summingLong( row -> ( Long ) row[5] ) ) ); } private Characteristic convertRowToCharacteristic( Object[] row ) { @@ -865,8 +867,7 @@ private Characteristic convertRowToCharacteristic( Object[] row ) { } catch ( IllegalArgumentException e ) { evidenceCode = null; } - Characteristic c = Characteristic.Factory.newInstance( null, null, ( String ) row[0], ( String ) row[1], ( String ) row[2], ( String ) row[3], evidenceCode ); - return c; + return Characteristic.Factory.newInstance( null, null, ( String ) row[0], ( String ) row[1], ( String ) row[2], ( String ) row[3], evidenceCode ); } /** @@ -958,22 +959,25 @@ public Map> getArrayDesignsUsed( Collection @Override public Map getTechnologyTypeUsageFrequency() { - Query query = getSessionFactory().getCurrentSession().createQuery( - "select a.technologyType, oa.technologyType, count(distinct ee) from ExpressionExperiment ee " - + "join ee.bioAssays ba " - + "join ba.arrayDesignUsed a " - + "left join ba.originalPlatform oa " - + AclQueryUtils.formAclRestrictionClause( "ee.id" ) + " " - + "and (oa is null or a.technologyType <> oa.technologyType) " // ignore noop switch - + formNonTroubledClause( "ee" ) - + formNonTroubledClause( "a" ) + " " - + "group by a.technologyType, oa.technologyType" ); - AclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); + Query q = getSessionFactory().getCurrentSession() + .createSQLQuery( "select AD.TECHNOLOGY_TYPE as TT, count(distinct EE2AD.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN EE2AD " + + "join ARRAY_DESIGN AD on EE2AD.ARRAY_DESIGN_FK = AD.ID " + + EE2CAclQueryUtils.formNativeAclJoinClause( "EE2AD.EXPRESSION_EXPERIMENT_FK" ) + " " + + "where EE2AD.EXPRESSION_EXPERIMENT_FK is not NULL" + + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "EE2AD.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + + formNativeNonTroubledClause( "EE2AD.ARRAY_DESIGN_FK", ArrayDesign.class ) + + formNativeNonTroubledClause( "EE2AD.EXPRESSION_EXPERIMENT_FK", ExpressionExperiment.class ) + + " group by AD.TECHNOLOGY_TYPE" ) + .addScalar( "TT", StandardBasicTypes.STRING ) + .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) + .addSynchronizedQuerySpace( EE2AD_QUERY_SPACE ) + .addSynchronizedEntityClass( ExpressionExperiment.class ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .setCacheable( true ); + EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); //noinspection unchecked - List result = query - .setCacheable( true ) - .list(); - return aggregateTechnologyTypeCounts( result ); + List results = q.list(); + return results.stream().collect( Collectors.groupingBy( row -> TechnologyType.valueOf( ( String ) row[0] ), Collectors.summingLong( row -> ( Long ) row[1] ) ) ); } @Override @@ -981,40 +985,24 @@ public Map getTechnologyTypeUsageFrequency( Collection result = getSessionFactory().getCurrentSession() - .createQuery( "select a.technologyType, oa.technologyType, count(distinct ee) from ExpressionExperiment ee " - + "join ee.bioAssays ba " - + "join ba.arrayDesignUsed a " - + "left join ba.originalPlatform oa " - + "where ee.id in :ids " - + "and (oa is null or a.technologyType <> oa.technologyType) " // ignore noop switch - + formNonTroubledClause( "ee" ) - + formNonTroubledClause( "a" ) + " " - + "group by a.technologyType, oa.technologyType" ) - .setParameterList( "ids", eeIds ) - .setCacheable( true ) - .list(); - return aggregateTechnologyTypeCounts( result ); - } - - private Map aggregateTechnologyTypeCounts( List result ) { - Map counts = new HashMap<>(); - for ( Object[] row : result ) { - TechnologyType tt = ( TechnologyType ) row[0]; - TechnologyType originalTt = ( TechnologyType ) row[1]; - Long count = ( Long ) row[2]; - counts.compute( tt, ( k, v ) -> v == null ? count : v + count ); - if ( originalTt != null ) { - counts.compute( originalTt, ( k, v ) -> v == null ? count : v + count ); - } - } - return counts; + Query q = getSessionFactory().getCurrentSession() + .createSQLQuery( "select AD.TECHNOLOGY_TYPE as TT, count(distinct EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN EE2AD " + + "join ARRAY_DESIGN AD on EE2AD.ARRAY_DESIGN_FK = AD.ID " + + "where EE2AD.EXPRESSION_EXPERIMENT_FK in (:ids) " + + "group by AD.TECHNOLOGY_TYPE" ) + .addScalar( "TT", StandardBasicTypes.STRING ) + .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) + .addSynchronizedQuerySpace( EE2AD_QUERY_SPACE ) + .addSynchronizedEntityClass( ExpressionExperiment.class ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .setCacheable( true ); + return streamByBatch( q, "ids", eeIds, getBatchSize(), Object[].class ) + .collect( Collectors.groupingBy( row -> TechnologyType.valueOf( ( String ) row[0] ), Collectors.summingLong( row -> ( Long ) row[1] ) ) ); } @Override public Map getArrayDesignsUsageFrequency( int maxResults ) { - return getPlatformsUsageFrequency( null, false, maxResults ); + return getPlatformsUsageFrequency( false, maxResults ); } @Override @@ -1024,7 +1012,7 @@ public Map getArrayDesignsUsageFrequency( Collection ee @Override public Map getOriginalPlatformsUsageFrequency( int maxResults ) { - return getPlatformsUsageFrequency( null, true, maxResults ); + return getPlatformsUsageFrequency( true, maxResults ); } @Override @@ -1032,25 +1020,21 @@ public Map getOriginalPlatformsUsageFrequency( Collection getPlatformsUsageFrequency( @Nullable Collection eeIds, boolean original, int maxResults ) { - if ( eeIds != null && eeIds.isEmpty() ) { - return Collections.emptyMap(); - } + private Map getPlatformsUsageFrequency( boolean original, int maxResults ) { Query query = getSessionFactory().getCurrentSession() - .createSQLQuery( "select ad.*, count(distinct i.ID) EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " - + "join INVESTIGATION i on i.ID = ee2ad.EXPRESSION_EXPERIMENT_FK " - + "join CURATION_DETAILS eecd on eecd.ID = i.CURATION_DETAILS_FK " + .createSQLQuery( "select ad.*, count(distinct ee2ad.EXPRESSION_EXPERIMENT_FK) EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " + "join ARRAY_DESIGN ad on ee2ad.ARRAY_DESIGN_FK = ad.ID " - + "join CURATION_DETAILS adcd on adcd.ID = ad.CURATION_DETAILS_FK " + EE2CAclQueryUtils.formNativeAclJoinClause( "ee2ad.EXPRESSION_EXPERIMENT_FK" ) + " " - + "where not eecd.TROUBLED and not adcd.TROUBLED " - + "and ee2ad.IS_ORIGINAL_PLATFORM = :original " + + "where ee2ad.IS_ORIGINAL_PLATFORM = :original" // exclude noop switch - + ( original ? " and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM) " : "" ) - + ( eeIds != null ? "and ee2ad.EXPRESSION_EXPERIMENT_FK in :ids " : "" ) + + ( original ? " and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM)" : "" ) + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "ee2ad.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " - + "group by ad.ID " - + "order by EE_COUNT desc" ) + // exclude troubled platforms or experiments for non-admins + + formNativeNonTroubledClause( "ee2ad.ARRAY_DESIGN_FK", ArrayDesign.class ) + + formNativeNonTroubledClause( "ee2ad.EXPRESSION_EXPERIMENT_FK", ExpressionExperiment.class ) + + " group by ad.ID " + // no need to sort results if limiting, we're collecting in a map + + ( maxResults > 0 ? "order by EE_COUNT desc" : "" ) ) .addEntity( ArrayDesign.class ) .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) // ensures that the cache is invalidated when the ee2ad table is regenerated @@ -1059,37 +1043,80 @@ private Map getPlatformsUsageFrequency( @Nullable Collection< .addSynchronizedEntityClass( ExpressionExperiment.class ) .addSynchronizedEntityClass( ArrayDesign.class ); query.setParameter( "original", original ); - if ( eeIds != null ) { - query.setParameterList( "ids", eeIds ); - } EE2CAclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); EE2CAclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); + query.setCacheable( true ); + List result; //noinspection unchecked - List result = query - .setCacheable( true ) + result = query .setMaxResults( maxResults ) .list(); return result.stream().collect( groupingBy( row -> ( ArrayDesign ) row[0], summingLong( row -> ( Long ) row[1] ) ) ); } + private Map getPlatformsUsageFrequency( Collection eeIds, boolean original, int maxResults ) { + if ( eeIds.isEmpty() ) { + return Collections.emptyMap(); + } + // exclude noop switch + // no need to sort results if limiting, we're collecting in a map + Query query = getSessionFactory().getCurrentSession() + .createSQLQuery( "select ad.*, count(distinct ee2ad.EXPRESSION_EXPERIMENT_FK) EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " + + "join ARRAY_DESIGN ad on ee2ad.ARRAY_DESIGN_FK = ad.ID " + + "where ee2ad.IS_ORIGINAL_PLATFORM = :original" + // exclude noop switch + + ( original ? " and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM)" : "" ) + + " and ee2ad.EXPRESSION_EXPERIMENT_FK in :ids " + + "group by ad.ID " + // no need to sort results if limiting, we're collecting in a map + + ( maxResults > 0 ? "order by EE_COUNT desc" : "" ) ) + .addEntity( ArrayDesign.class ) + .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) + // ensures that the cache is invalidated when the ee2ad table is regenerated + .addSynchronizedQuerySpace( EE2AD_QUERY_SPACE ) + // ensures that the cache is invalidated when EEs or ADs are added/removed + .addSynchronizedEntityClass( ExpressionExperiment.class ) + .addSynchronizedEntityClass( ArrayDesign.class ); + query.setParameter( "original", original ); + query.setCacheable( true ); + Stream result; + if ( eeIds.size() > MAX_PARAMETER_LIST_SIZE ) { + result = streamByBatch( query, "ids", eeIds, 2048 ); + if ( maxResults > 0 ) { + // results need to be aggregated and limited + return result + .collect( groupingBy( row -> ( ArrayDesign ) row[0], summingLong( row -> ( Long ) row[1] ) ) ) + .entrySet().stream() + .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) + .limit( maxResults ) + .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue ) ); + } + } else { + //noinspection unchecked + result = query + .setParameterList( "ids", optimizeParameterList( eeIds ) ) + .setMaxResults( maxResults ) + .list() + .stream(); + } + return result.collect( groupingBy( row -> ( ArrayDesign ) row[0], summingLong( row -> ( Long ) row[1] ) ) ); + } + @Override public Map> getAuditEvents( Collection ids ) { - //language=HQL - final String queryString = - "select ee.id, auditEvent from ExpressionExperiment ee inner join ee.auditTrail as auditTrail inner join auditTrail.events as auditEvent " - + " where ee.id in (:ids) "; - - List result = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).list(); + //noinspection unchecked + List result = this.getSessionFactory().getCurrentSession() + .createQuery( "select ee.id, auditEvent from ExpressionExperiment ee " + + "join ee.auditTrail as auditTrail " + + "join auditTrail.events as auditEvent " + + "where ee.id in (:ids) " ) + .setParameterList( "ids", optimizeParameterList( ids ) ) + .list(); Map> eventMap = new HashMap<>(); - for ( Object o : result ) { - Object[] row = ( Object[] ) o; - Long id = ( Long ) row[0]; - AuditEvent event = ( AuditEvent ) row[1]; - - this.addEventsToMap( eventMap, id, event ); + for ( Object[] row : result ) { + this.addEventsToMap( eventMap, ( Long ) row[0], ( AuditEvent ) row[1] ); } // add in expression experiment ids that do not have events. Set // their values to null. @@ -1150,48 +1177,44 @@ public Collection getExperimentsWithOutliers() { @Override public Map getLastArrayDesignUpdate( Collection expressionExperiments ) { - //language=HQL - final String queryString = "select ee.id, max(s.lastUpdated) from ExpressionExperiment as ee inner join " - + "ee.bioAssays b inner join b.arrayDesignUsed a join a.curationDetails s " - + " where ee in (:ees) group by ee.id "; - - List res = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ees", expressionExperiments ).list(); - + if ( expressionExperiments.isEmpty() ) { + return Collections.emptyMap(); + } + //noinspection unchecked + List res = this.getSessionFactory().getCurrentSession() + .createQuery( "select ee.id, max(s.lastUpdated) from ExpressionExperiment as ee " + + "inner join ee.bioAssays b " + + "join b.arrayDesignUsed a " + + "join a.curationDetails s " + + "where ee in (:ees) " + + "group by ee.id" ) + .setParameterList( "ees", optimizeIdentifiableParameterList( expressionExperiments ) ) + .list(); assert ( !res.isEmpty() ); - Map result = new HashMap<>(); - for ( Object o : res ) { - Object[] oa = ( Object[] ) o; - Long id = ( Long ) oa[0]; - Date d = ( Date ) oa[1]; - result.put( id, d ); + for ( Object[] row : res ) { + result.put( ( Long ) row[0], ( Date ) row[1] ); } return result; } @Override public Date getLastArrayDesignUpdate( ExpressionExperiment ee ) { - - //language=HQL - final String queryString = "select max(s.lastUpdated) from ExpressionExperiment as ee inner join " - + "ee.bioAssays b inner join b.arrayDesignUsed a join a.curationDetails s " + " where ee = :ee "; - - List res = this.getSessionFactory().getCurrentSession().createQuery( queryString ).setParameter( "ee", ee ) - .list(); - - assert ( !res.isEmpty() ); - - return ( Date ) res.iterator().next(); + return ( Date ) this.getSessionFactory().getCurrentSession() + .createQuery( "select max(s.lastUpdated) from ExpressionExperiment as ee " + + "join ee.bioAssays b join b.arrayDesignUsed a join a.curationDetails s " + + "where ee = :ee" ) + .setParameter( "ee", ee ) + .uniqueResult(); } @Override public Map getPerTaxonCount() { + //language=HQL String queryString = "select ee.taxon, count(distinct ee) as EE_COUNT from ExpressionExperiment ee " - + AclQueryUtils.formAclRestrictionClause( "ee.id" ) + " " - + formNonTroubledClause( "ee" ) + " " - + "group by ee.taxon " - + "order by EE_COUNT desc"; + + AclQueryUtils.formAclRestrictionClause( "ee.id" ) + + formNonTroubledClause( "ee", ExpressionExperiment.class ) + + " group by ee.taxon"; Query query = this.getSessionFactory().getCurrentSession().createQuery( queryString ); @@ -1208,20 +1231,17 @@ public Map getPerTaxonCount() { } @Override - public Map getPerTaxonCount( List ids ) { + public Map getPerTaxonCount( Collection ids ) { if ( ids.isEmpty() ) { return Collections.emptyMap(); } - //noinspection unchecked - List list = this.getSessionFactory().getCurrentSession().createQuery( - "select ee.taxon, count(distinct ee) as EE_COUNT from ExpressionExperiment ee " - + "where ee.id in :eeIds " - + "group by ee.taxon " - + "order by EE_COUNT desc" ) - .setParameterList( "eeIds", ids ) - .list(); - return list.stream() - .collect( Collectors.toMap( row -> ( Taxon ) row[0], row -> ( Long ) row[1] ) ); + Query query = this.getSessionFactory().getCurrentSession() + .createQuery( "select ee.taxon, count(distinct ee) as EE_COUNT from ExpressionExperiment ee " + + "where ee.id in :eeIds " + + "group by ee.taxon" ) + .setCacheable( true ); + return streamByBatch( query, "eeIds", ids, getBatchSize(), Object[].class ) + .collect( Collectors.groupingBy( row -> ( Taxon ) row[0], Collectors.summingLong( row -> ( Long ) row[1] ) ) ); } public Map getPopulatedFactorCounts( Collection ids ) { @@ -1239,7 +1259,7 @@ public Map getPopulatedFactorCounts( Collection ids ) { + "ef where e.id in (:ids) group by e.id"; //noinspection unchecked List res = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).list(); + .setParameterList( "ids", optimizeParameterList( ids ) ).list(); for ( Object[] ro : res ) { Long id = ( Long ) ro[0]; @@ -1266,7 +1286,7 @@ public Map getPopulatedFactorCountsExcludeBatch( Collection id //noinspection unchecked List res = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ) // Set ids + .setParameterList( "ids", optimizeParameterList( ids ) ) // Set ids .setParameter( "category", ExperimentalFactorService.BATCH_FACTOR_CATEGORY_NAME ) // Set batch category .setParameter( "name", ExperimentalFactorService.BATCH_FACTOR_NAME ) // set batch name .list(); @@ -1349,7 +1369,7 @@ public Map> getSampleRemovalEvents( Map> result = new HashMap<>(); List r = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ees", expressionExperiments ).list(); + .setParameterList( "ees", optimizeIdentifiableParameterList( expressionExperiments ) ).list(); for ( Object o : r ) { Object[] ol = ( Object[] ) o; @@ -1400,7 +1420,7 @@ public Map getTaxa( Collection bioAssaySets // FIXME: this query cannot be made cacheable because the taxon is not initialized when retrieved from the cache, defeating the purpose of caching altogether //noinspection unchecked List list = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ees", bioAssaySets ) + .setParameterList( "ees", optimizeIdentifiableParameterList( bioAssaySets ) ) .list(); //noinspection unchecked @@ -1466,7 +1486,7 @@ private Map> getExpressionExperimentDetai + "where ee.id in :eeIds " // FIXME: apply ACLs, other parts or platform might be private + "group by ee, ad, op, oe" ) - .setParameterList( "eeIds", expressionExperimentIds ) + .setParameterList( "eeIds", optimizeParameterList( expressionExperimentIds ) ) .setCacheable( cacheable ) .list(); return results.stream().collect( @@ -1490,7 +1510,7 @@ public List loadWithRelationsAndCache( List ids ) { + "left join s.lastTroubledEvent as eTrbl " + "left join ee.geeq as geeq " + "where ee.id in :ids" ) - .setParameterList( "ids", ids ) + .setParameterList( "ids", optimizeParameterList( ids ) ) .setCacheable( true ) // this transformer performs initialization of cached results .setResultTransformer( getEntityTransformer() ) @@ -2204,14 +2224,16 @@ private void populateArrayDesignCount( Collection results = getSessionFactory().getCurrentSession() - .createQuery( "select ee.id, count(distinct ba.arrayDesignUsed) from ExpressionExperiment ee left join ee.bioAssays as ba where ee.id in (:ids) group by ee" ) - .setParameterList( "ids", EntityUtils.getIds( eevos ) ) - .list(); - Map adCountById = results.stream().collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); + Query q = getSessionFactory().getCurrentSession() + .createQuery( "select ee.id, count(distinct ba.arrayDesignUsed) from ExpressionExperiment ee " + + "join ee.bioAssays as ba " + + "where ee.id in (:ids) " + + "group by ee" ) + .setCacheable( true ); + Map adCountById = streamByBatch( q, "ids", EntityUtils.getIds( eevos ), 2048, Object[].class ) + .collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); for ( ExpressionExperimentValueObject eevo : eevos ) { - eevo.setArrayDesignCount( adCountById.get( eevo.getId() ) ); + eevo.setArrayDesignCount( adCountById.getOrDefault( eevo.getId(), 0L ) ); } } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java index 89d4e01f61..82a67557a4 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java @@ -146,7 +146,7 @@ ExpressionExperiment addRawVectors( ExpressionExperiment eeToUpdate, List loadIdsWithCache( @Nullable Filters filters, @Nullable Sort sort ); - long countWithCache( @Nullable Filters filters ); + long countWithCache( @Nullable Filters filters, @Nullable Set extraIds ); @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_VALUE_OBJECT_COLLECTION_READ" }) Slice loadValueObjectsWithCache( @Nullable Filters filters, @Nullable Sort sort, int offset, int limit ); @@ -263,13 +263,11 @@ ExpressionExperiment addRawVectors( ExpressionExperiment eeToUpdate, /** * Apply ontological inference to augment a filter with additional terms. - * @param mentionedTermUris if non-null, all the terms explicitly mentioned in the filters are added to the - * collection. The returned filter might contain terms that have been inferred. + * @param mentionedTerms if non-null, all the terms explicitly mentioned in the filters are added to the collection. + * The returned filter might contain terms that have been inferred. */ Filters getFiltersWithInferredAnnotations( Filters f, @Nullable Collection mentionedTerms ); - Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ); - @Value class CharacteristicWithUsageStatisticsAndOntologyTerm { /** @@ -302,6 +300,18 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { */ String UNCATEGORIZED = ExpressionExperimentDao.UNCATEGORIZED; + /** + * Obtain category usage frequency for datasets matching the given filter. + * + * @param filters filters restricting the terms to a given set of datasets + * @param excludedCategoryUris ensure that the given category URIs are excluded + * @param excludedTermUris ensure that the given term URIs and their sub-terms (as per {@code subClassOf} relation) + * are excluded; this requires relevant ontologies to be loaded in {@link ubic.gemma.core.ontology.OntologyService}. + * @param retainedTermUris ensure that the given terms are retained (overrides any exclusion from minFrequency and excludedTermUris) + * @param maxResults maximum number of results to return + */ + Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ); + /** * Obtain annotation usage frequency for datasets matching the given filters. *

@@ -311,18 +321,18 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { * if new terms are attached. * * @param filters filters restricting the terms to a given set of datasets - * @param maxResults maximum number of results to return - * @param minFrequency minimum occurrences of a term to be included in the results * @param category a category to restrict annotations to, or null to include all categories - * @param excludedCategoryUris ensure that the given categories are excluded - * @param excludedTermUris ensure that the given terms and their sub-terms (as per {@code subClassOf} relation) + * @param excludedCategoryUris ensure that the given category URIs are excluded + * @param excludedTermUris ensure that the given term URIs and their sub-terms (as per {@code subClassOf} relation) * are excluded; this requires relevant ontologies to be loaded in {@link ubic.gemma.core.ontology.OntologyService}. + * @param minFrequency minimum occurrences of a term to be included in the results * @param retainedTermUris ensure that the given terms are retained (overrides any exclusion from minFrequency and excludedTermUris) + * @param maxResults maximum number of results to return * @return mapping annotations grouped by category and term (URI or value if null) to their number of occurrences in - * the matched datasets + * the matched datasets and ordered in descending number of associated experiments * @see ExpressionExperimentDao#getAnnotationsUsageFrequency(Collection, Class, int, int, String, Collection, Collection, Collection) */ - List getAnnotationsUsageFrequency( @Nullable Filters filters, int maxResults, int minFrequency, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ); + List getAnnotationsUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, int minFrequency, @Nullable Collection retainedTermUris, int maxResults ); /** * @param expressionExperiment experiment @@ -333,15 +343,16 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "ACL_SECURABLE_READ" }) Collection getArrayDesignsUsed( BioAssaySet expressionExperiment ); - Map getTechnologyTypeUsageFrequency( @Nullable Filters filters ); + Map getTechnologyTypeUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds ); /** * Calculate the usage frequency of platforms by the datasets matching the provided filters. * * @param filters a set of filters to be applied as per {@link #load(Filters, Sort, int, int)} + * @param extraIds * @param maxResults the maximum of results, or unlimited if less than 1 */ - Map getArrayDesignUsedOrOriginalPlatformUsageFrequency( @Nullable Filters filters, int maxResults ); + Map getArrayDesignUsedOrOriginalPlatformUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds, int maxResults ); /** * Calculate the usage frequency of taxa by the datasets matching the provided filters. @@ -350,7 +361,7 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { * * @see #getPerTaxonCount() */ - Map getTaxaUsageFrequency( @Nullable Filters filters ); + Map getTaxaUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds ); /** * Checks the experiment for a batch confound. diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 7bd86362a5..9bc4858c92 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -623,11 +623,21 @@ public Filters getFiltersWithInferredAnnotations( Filters f, @Nullable Collectio } // recreate a clause with inferred terms for ( Map.Entry> e : termUrisBySubClause.entrySet() ) { - Collection termAndChildrenUris = new HashSet<>( e.getValue() ); Set terms = ontologyService.getTerms( e.getValue() ); + Set termAndChildrenUris = new TreeSet<>( String.CASE_INSENSITIVE_ORDER ); + termAndChildrenUris.addAll( e.getValue() ); termAndChildrenUris.addAll( ontologyService.getChildren( terms, false, true ).stream() .map( OntologyTerm::getUri ) .collect( Collectors.toList() ) ); + if ( termAndChildrenUris.size() > QueryUtils.MAX_PARAMETER_LIST_SIZE ) { + log.warn( String.format( "There too many terms for the clause %s, will pick top %d terms.", + e.getKey().getOriginalProperty(), QueryUtils.MAX_PARAMETER_LIST_SIZE ) ); + termAndChildrenUris = termAndChildrenUris.stream() + // favour terms that are mentioned in the filter + .sorted( Comparator.comparing( e.getValue()::contains, Comparator.reverseOrder() ) ) + .limit( QueryUtils.MAX_PARAMETER_LIST_SIZE ) + .collect( Collectors.toSet() ); + } if ( mentionedTerms != null ) { mentionedTerms.addAll( terms ); } @@ -705,7 +715,12 @@ public List loadIdsWithCache( @Nullable Filters filters, @Nullable Sort so @Override @Transactional(readOnly = true) - public long countWithCache( @Nullable Filters filters ) { + public long countWithCache( @Nullable Filters filters, @Nullable Set extraIds ) { + if ( extraIds != null ) { + List eeIds = loadIdsWithCache( filters, null ); + eeIds.retainAll( extraIds ); + return eeIds.size(); + } return expressionExperimentDao.countWithCache( filters ); } @@ -727,17 +742,20 @@ private static class SubClauseKey { @Override @Transactional(readOnly = true) - public Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ) { - List eeIds; + public Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ) { + Collection eeIds; if ( filters == null || filters.isEmpty() ) { - eeIds = null; + eeIds = extraIds; } else { eeIds = expressionExperimentDao.loadIdsWithCache( filters, null ); + if ( extraIds != null ) { + eeIds.retainAll( extraIds ); + } } if ( excludedTermUris != null ) { excludedTermUris = inferTermsUris( excludedTermUris ); } - return expressionExperimentDao.getCategoriesUsageFrequency( eeIds, excludedCategoryUris, excludedTermUris, retainedTermUris ); + return expressionExperimentDao.getCategoriesUsageFrequency( eeIds, excludedCategoryUris, excludedTermUris, retainedTermUris, maxResults ); } /** @@ -746,19 +764,23 @@ public Map getCategoriesUsageFrequency( @Nullable Filters */ @Override @Transactional(readOnly = true) - public List getAnnotationsUsageFrequency( @Nullable Filters filters, int maxResults, int minFrequency, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ) { + public List getAnnotationsUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, int minFrequency, @Nullable Collection retainedTermUris, int maxResults ) { if ( excludedTermUris != null ) { excludedTermUris = inferTermsUris( excludedTermUris ); } - Map result; + Collection eeIds; if ( filters == null || filters.isEmpty() ) { - result = expressionExperimentDao.getAnnotationsUsageFrequency( null, null, maxResults, minFrequency, category, excludedCategoryUris, excludedTermUris, retainedTermUris ); + eeIds = extraIds; } else { - List eeIds = expressionExperimentDao.loadIdsWithCache( filters, null ); - result = expressionExperimentDao.getAnnotationsUsageFrequency( eeIds, null, maxResults, minFrequency, category, excludedCategoryUris, excludedTermUris, retainedTermUris ); + eeIds = expressionExperimentDao.loadIdsWithCache( filters, null ); + if ( extraIds != null ) { + eeIds.retainAll( extraIds ); + } } + Map result = expressionExperimentDao.getAnnotationsUsageFrequency( eeIds, null, maxResults, minFrequency, category, excludedCategoryUris, excludedTermUris, retainedTermUris ); + List resultWithParents = new ArrayList<>( result.size() ); // gather all the values and categories @@ -766,14 +788,15 @@ public List getAnnotationsUsag .flatMap( c -> Stream.of( c.getValueUri(), c.getCategoryUri() ) ) .filter( Objects::nonNull ) .collect( Collectors.toSet() ); - // TODO: handle more than one term per URI Map> termByUri = ontologyService.getTerms( uris ).stream() + .filter( t -> t.getUri() != null ) // should never occur, but better be safe than sorry .collect( Collectors.groupingBy( OntologyTerm::getUri, Collectors.toSet() ) ); for ( Map.Entry entry : result.entrySet() ) { Characteristic c = entry.getKey(); OntologyTerm term; if ( c.getValueUri() != null && termByUri.containsKey( c.getValueUri() ) ) { + // TODO: handle more than one term per URI term = termByUri.get( c.getValueUri() ).iterator().next(); } else if ( c.getCategoryUri() != null && termByUri.containsKey( c.getCategoryUri() ) ) { term = new OntologyTermSimpleWithCategory( c.getValueUri(), c.getValue(), termByUri.get( c.getCategoryUri() ).iterator().next() ); @@ -784,6 +807,9 @@ public List getAnnotationsUsag resultWithParents.add( new CharacteristicWithUsageStatisticsAndOntologyTerm( entry.getKey(), entry.getValue(), term ) ); } + // sort in descending order + resultWithParents.sort( Comparator.comparing( CharacteristicWithUsageStatisticsAndOntologyTerm::getNumberOfExpressionExperiments, Comparator.reverseOrder() ) ); + return resultWithParents; } @@ -833,7 +859,7 @@ public Collection getParents( boolean direct, boolean includeAddit return Collections.singleton( categoryTerm ); } else { // combine the direct parents + all the parents from the parents - return Stream.concat( Stream.of( categoryTerm ), Stream.of( categoryTerm ).flatMap( t -> getParents( false, includeAdditionalProperties, keepObsoletes ).stream() ) ) + return Stream.concat( Stream.of( categoryTerm ), Stream.of( categoryTerm ).flatMap( t -> t.getParents( false, includeAdditionalProperties, keepObsoletes ).stream() ) ) .collect( Collectors.toSet() ); } } @@ -852,26 +878,43 @@ public Collection getArrayDesignsUsed( final BioAssaySet expression @Override @Transactional(readOnly = true) - public Map getTechnologyTypeUsageFrequency( @Nullable Filters filters ) { + public Map getTechnologyTypeUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds ) { if ( filters == null || filters.isEmpty() ) { - return expressionExperimentDao.getTechnologyTypeUsageFrequency(); + if ( extraIds != null ) { + return expressionExperimentDao.getTechnologyTypeUsageFrequency( extraIds ); + } else { + return expressionExperimentDao.getTechnologyTypeUsageFrequency(); + } } else { List ids = this.expressionExperimentDao.loadIdsWithCache( filters, null ); + if ( extraIds != null ) { + ids.retainAll( extraIds ); + } return expressionExperimentDao.getTechnologyTypeUsageFrequency( ids ); } } @Override @Transactional(readOnly = true) - public Map getArrayDesignUsedOrOriginalPlatformUsageFrequency( @Nullable Filters filters, int maxResults ) { + public Map getArrayDesignUsedOrOriginalPlatformUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds, int maxResults ) { Map result; if ( filters == null || filters.isEmpty() ) { - result = new HashMap<>( expressionExperimentDao.getArrayDesignsUsageFrequency( maxResults ) ); - for ( Map.Entry e : expressionExperimentDao.getOriginalPlatformsUsageFrequency( maxResults ).entrySet() ) { - result.compute( e.getKey(), ( k, v ) -> ( v != null ? v : 0L ) + e.getValue() ); + if ( extraIds != null ) { + result = new HashMap<>( expressionExperimentDao.getArrayDesignsUsageFrequency( extraIds, maxResults ) ); + for ( Map.Entry e : expressionExperimentDao.getOriginalPlatformsUsageFrequency( extraIds, maxResults ).entrySet() ) { + result.compute( e.getKey(), ( k, v ) -> ( v != null ? v : 0L ) + e.getValue() ); + } + } else { + result = new HashMap<>( expressionExperimentDao.getArrayDesignsUsageFrequency( maxResults ) ); + for ( Map.Entry e : expressionExperimentDao.getOriginalPlatformsUsageFrequency( maxResults ).entrySet() ) { + result.compute( e.getKey(), ( k, v ) -> ( v != null ? v : 0L ) + e.getValue() ); + } } } else { List ids = this.expressionExperimentDao.loadIdsWithCache( filters, null ); + if ( extraIds != null ) { + ids.retainAll( extraIds ); + } result = new HashMap<>( expressionExperimentDao.getArrayDesignsUsageFrequency( ids, maxResults ) ); for ( Map.Entry e : expressionExperimentDao.getOriginalPlatformsUsageFrequency( ids, maxResults ).entrySet() ) { result.compute( e.getKey(), ( k, v ) -> ( v != null ? v : 0L ) + e.getValue() ); @@ -891,11 +934,18 @@ public Map getArrayDesignUsedOrOriginalPlatformUsageFrequency @Override @Transactional(readOnly = true) - public Map getTaxaUsageFrequency( @Nullable Filters filters ) { + public Map getTaxaUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds ) { if ( filters == null || filters.isEmpty() ) { - return expressionExperimentDao.getPerTaxonCount(); + if ( extraIds != null ) { + return expressionExperimentDao.getPerTaxonCount( extraIds ); + } else { + return expressionExperimentDao.getPerTaxonCount(); + } } else { List ids = this.expressionExperimentDao.loadIdsWithCache( filters, null ); + if ( extraIds != null ) { + ids.retainAll( extraIds ); + } return expressionExperimentDao.getPerTaxonCount( ids ); } } @@ -906,6 +956,7 @@ public String getBatchConfound( ExpressionExperiment ee ) { ee = this.thawBioAssays( ee ); if ( !this.checkHasBatchInfo( ee ) ) { + log.info( "Experiment has no batch information, cannot check for confound: " + ee ); return null; } @@ -1041,6 +1092,8 @@ public BatchEffectDetails getBatchEffectDetails( ExpressionExperiment ee ) { @Transactional(readOnly = true) public BatchEffectType getBatchEffect( ExpressionExperiment ee ) { BatchEffectDetails beDetails = this.getBatchEffectDetails( ee ); + BatchEffectDetails.BatchEffectStatistics batchEffectStatistics = beDetails.getBatchEffectStatistics(); + if ( !beDetails.hasBatchInformation() ) { return BatchEffectType.NO_BATCH_INFO; } else if ( beDetails.getHasSingletonBatches() ) { @@ -1055,13 +1108,15 @@ public BatchEffectType getBatchEffect( ExpressionExperiment ee ) { } else if ( beDetails.hasProblematicBatchInformation() ) { // sort of generic return BatchEffectType.PROBLEMATIC_BATCH_INFO_FAILURE; - } else if ( beDetails.getBatchEffectStatistics() == null ) { - return BatchEffectType.BATCH_EFFECT_UNDETERMINED_FAILURE; - } else if ( beDetails.getBatchEffectStatistics().getPvalue() < ExpressionExperimentServiceImpl.BATCH_EFFECT_THRESHOLD ) { - // this means there was a batch effect but we couldn't correct it - return BatchEffectType.BATCH_EFFECT_FAILURE; } else { - return BatchEffectType.NO_BATCH_EFFECT_SUCCESS; + if ( batchEffectStatistics == null ) { + return BatchEffectType.BATCH_EFFECT_UNDETERMINED_FAILURE; + } else if ( batchEffectStatistics.getPvalue() < ExpressionExperimentServiceImpl.BATCH_EFFECT_THRESHOLD ) { + // this means there was a batch effect but we couldn't correct it + return BatchEffectType.BATCH_EFFECT_FAILURE; + } else { + return BatchEffectType.NO_BATCH_EFFECT_SUCCESS; + } } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/FactorValueDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/FactorValueDaoImpl.java index 0bac3c746d..6c87900630 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/FactorValueDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/FactorValueDaoImpl.java @@ -37,6 +37,8 @@ import java.util.Set; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** *

* Base Spring DAO Class: is able to create, update, remove, load, and find objects of type @@ -88,7 +90,7 @@ public Map loadIdsWithNumberOfOldStyleCharacteristics( Set //noinspection unchecked result = ( List ) this.getSessionFactory().getCurrentSession() .createQuery( "select fv.id, size(fv.oldStyleCharacteristics) from FactorValue fv where fv.id not in :ids group by fv order by id" ) - .setParameterList( "ids", excludedIds ) + .setParameterList( "ids", optimizeParameterList( excludedIds ) ) .list(); } return result.stream().collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Integer ) row[1] ) ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDaoImpl.java index 60037a9108..98610cad04 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDaoImpl.java @@ -27,7 +27,6 @@ import org.springframework.beans.factory.InitializingBean; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; -import ubic.basecode.util.BatchIterator; import ubic.gemma.model.common.Describable; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.DatabaseEntry; @@ -48,6 +47,9 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.batchParameterList; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * Base Spring DAO Class: is able to create, update, remove, load, and find objects of type Gene. * @@ -177,7 +179,7 @@ public Map findByOfficialSymbols( Collection query, Long t //language=HQL final String queryString = "select g from Gene as g join fetch g.taxon t where g.officialSymbol in (:symbols) and t.id = :taxonId"; - for ( Collection batch : new BatchIterator<>( query, GeneDaoImpl.BATCH_SIZE ) ) { + for ( Collection batch : batchParameterList( query, getBatchSize() ) ) { //noinspection unchecked List results = this.getSessionFactory().getCurrentSession() .createQuery( queryString ) @@ -197,7 +199,7 @@ public Map findByNcbiIds( Collection ncbiIds ) { //language=HQL final String queryString = "from Gene g where g.ncbiGeneId in (:ncbi)"; - for ( Collection batch : new BatchIterator<>( ncbiIds, GeneDaoImpl.BATCH_SIZE ) ) { + for ( Collection batch : batchParameterList( ncbiIds, getBatchSize() ) ) { //noinspection unchecked List results = this.getSessionFactory().getCurrentSession() .createQuery( queryString ) @@ -332,8 +334,14 @@ public List loadThawed( Collection ids ) { return result; StopWatch timer = new StopWatch(); timer.start(); - for ( Collection batch : new BatchIterator<>( ids, GeneDaoImpl.BATCH_SIZE ) ) { - result.addAll( this.doLoadThawedLite( batch ) ); + for ( Collection batch : batchParameterList( ids, getBatchSize() ) ) { + //noinspection unchecked + result.addAll( this.getSessionFactory().getCurrentSession().createQuery( + "select distinct g from Gene g left join fetch g.aliases left join fetch g.accessions acc " + + "join fetch g.taxon t left join fetch g.products gp left join fetch g.multifunctionality " + + "where g.id in (:gIds)" ) + .setParameterList( "gIds", batch ) + .list() ); } if ( timer.getTime() > 1000 ) { AbstractDao.log.debug( "Load+thawRawAndProcessed " + result.size() + " genes: " + timer.getTime() + "ms" ); @@ -349,8 +357,12 @@ public Collection loadThawedLiter( Collection ids ) { return result; StopWatch timer = new StopWatch(); timer.start(); - for ( Collection batch : new BatchIterator<>( ids, GeneDaoImpl.BATCH_SIZE ) ) { - result.addAll( this.doLoadThawedLiter( batch ) ); + for ( Collection batch : batchParameterList( ids, getBatchSize() ) ) { + //noinspection unchecked + result.addAll( this.getSessionFactory().getCurrentSession() + .createQuery( "select g from Gene g join fetch g.taxon t where g.id in (:gIds)" ) + .setParameterList( "gIds", batch ) + .list() ); } if ( timer.getTime() > 1000 ) { AbstractDao.log.debug( "Load+thawRawAndProcessed " + result.size() + " genes: " + timer.getTime() + "ms" ); @@ -391,22 +403,10 @@ public Gene thawAliases( final Gene gene ) { public Collection thawLite( final Collection genes ) { if ( genes.isEmpty() ) return new HashSet<>(); - Collection result = new HashSet<>(); - Collection batch = new HashSet<>(); - - for ( Gene g : genes ) { - batch.add( g ); - if ( batch.size() == GeneDaoImpl.BATCH_SIZE ) { - result.addAll( this.loadThawed( EntityUtils.getIds( batch ) ) ); - batch.clear(); - } - } - - if ( !batch.isEmpty() ) { - result.addAll( this.loadThawed( EntityUtils.getIds( batch ) ) ); + for ( Collection batch : batchParameterList( EntityUtils.getIds( genes ), getBatchSize() ) ) { + result.addAll( this.loadThawed( batch ) ); } - return result; } @@ -456,7 +456,7 @@ public int removeAll() { if ( !gpIds.isEmpty() ) { removedGeneProductsAccessions = getSessionFactory().getCurrentSession() .createSQLQuery( "delete from DATABASE_ENTRY where GENE_PRODUCT_FK in :gpIds" ) - .setParameterList( "gpIds", gpIds ) + .setParameterList( "gpIds", optimizeParameterList( gpIds ) ) .executeUpdate(); } else { removedGeneProductsAccessions = 0; @@ -472,7 +472,7 @@ public int removeAll() { if ( !gaIds.isEmpty() ) { removedAliases = getSessionFactory().getCurrentSession() .createQuery( "delete from GeneAlias ga where ga.id in :gaIds" ) - .setParameterList( "gaIds", gaIds ) + .setParameterList( "gaIds", optimizeParameterList( gaIds ) ) .executeUpdate(); } else { removedAliases = 0; @@ -488,7 +488,7 @@ public int removeAll() { if ( !plIds.isEmpty() ) { removedPhysicalLocations = getSessionFactory().getCurrentSession() .createQuery( "delete from PhysicalLocation pl where pl.id in :plIds" ) - .setParameterList( "plIds", plIds ) + .setParameterList( "plIds", optimizeParameterList( plIds ) ) .executeUpdate(); } else { removedPhysicalLocations = 0; @@ -658,22 +658,6 @@ protected void postProcessValueObjects( List geneValueObjects ) fillMultifunctionalityRank( geneValueObjects ); } - private Collection doLoadThawedLite( Collection ids ) { - //noinspection unchecked - return this.getSessionFactory().getCurrentSession().createQuery( - "select distinct g from Gene g left join fetch g.aliases left join fetch g.accessions acc " - + "join fetch g.taxon t left join fetch g.products gp left join fetch g.multifunctionality " - + "where g.id in (:gIds)" ).setParameterList( "gIds", ids ).list(); - } - - private Collection doLoadThawedLiter( Collection ids ) { - //noinspection unchecked - return this.getSessionFactory().getCurrentSession() - .createQuery( "select g from Gene g join fetch g.taxon t where g.id in (:gIds)" ) - .setParameterList( "gIds", ids ) - .list(); - } - /** * Returns genes in the region. */ @@ -726,7 +710,7 @@ private void fillAliases( List geneValueObjects ) { //noinspection unchecked List results = getSessionFactory().getCurrentSession() .createQuery( "select g.id, a.alias from Gene g join g.aliases a where g.id in :ids" ) - .setParameterList( "ids", geneValueObjects.stream().map( GeneValueObject::getId ).collect( Collectors.toSet() ) ) + .setParameterList( "ids", optimizeParameterList( EntityUtils.getIds( geneValueObjects ) ) ) .list(); Map> aliasByGeneId = results.stream() .collect( Collectors.groupingBy( @@ -749,7 +733,7 @@ private void fillAccessions( List geneValueObjects ) { //noinspection unchecked List results = getSessionFactory().getCurrentSession() .createQuery( "select g.id, a from Gene g join g.accessions a where g.id in :ids" ) - .setParameterList( "ids", geneValueObjects.stream().map( GeneValueObject::getId ).collect( Collectors.toSet() ) ) + .setParameterList( "ids", optimizeParameterList( EntityUtils.getIds( geneValueObjects ) ) ) .list(); Map> accessionsByGeneId = results.stream() .collect( Collectors.groupingBy( @@ -786,7 +770,7 @@ private void fillMultifunctionalityRank( List geneValueObjects //noinspection unchecked List results = getSessionFactory().getCurrentSession() .createQuery( "select g.id, g.multifunctionality.rank from Gene g where g.id in :ids" ) - .setParameterList( "ids", ids ) + .setParameterList( "ids", optimizeParameterList( ids ) ) .list(); Map result = results.stream() .collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Double ) row[1] ) ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/biosequence/BioSequenceDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/biosequence/BioSequenceDaoImpl.java index f7db7346e3..eb5847f549 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/biosequence/BioSequenceDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/biosequence/BioSequenceDaoImpl.java @@ -32,11 +32,12 @@ import ubic.gemma.persistence.service.AbstractDao; import ubic.gemma.persistence.service.AbstractVoEnabledDao; import ubic.gemma.persistence.util.BusinessKey; -import ubic.gemma.persistence.util.EntityUtils; import javax.annotation.Nullable; import java.util.*; +import static ubic.gemma.persistence.util.QueryUtils.batchIdentifiableParameterList; + /** * @author pavlidis * @see ubic.gemma.model.genome.biosequence.BioSequence @@ -97,30 +98,19 @@ public BioSequence findByAccession( DatabaseEntry databaseEntry ) { public Map> findByGenes( Collection genes ) { if ( genes == null || genes.isEmpty() ) return new HashMap<>(); - Map> results = new HashMap<>(); - - int batchSize = 500; - - if ( genes.size() <= batchSize ) { - this.findByGenesBatch( genes, results ); - return results; - } - - Collection batch = new HashSet<>(); - - for ( Gene gene : genes ) { - batch.add( gene ); - if ( batch.size() == batchSize ) { - this.findByGenesBatch( genes, results ); - batch.clear(); + for ( Collection batch : batchIdentifiableParameterList( genes, 500 ) ) { + //noinspection unchecked + List qr = this.getSessionFactory().getCurrentSession().createQuery( + "select distinct gene, bs from Gene gene " + + "join fetch gene.products ggp, BioSequence bs " + + "join bs.bioSequence2GeneProduct bs2gp join bs2gp.geneProduct bsgp " + + "where ggp = bsgp and gene in (:genes)" ) + .setParameterList( "genes", batch ).list(); + for ( Object[] row : qr ) { + results.computeIfAbsent( ( Gene ) row[0], k -> new HashSet<>() ).add( ( BioSequence ) row[1] ); } } - - if ( !batch.isEmpty() ) { - this.findByGenesBatch( genes, results ); - } - return results; } @@ -155,18 +145,15 @@ public Collection thaw( final Collection bioSequences return new HashSet<>(); Collection result = new HashSet<>(); - Collection batch = new HashSet<>(); - - for ( BioSequence g : bioSequences ) { - batch.add( g ); - if ( batch.size() == 100 ) { - result.addAll( this.doThawBatch( batch ) ); - batch.clear(); - } - } - - if ( !batch.isEmpty() ) { - result.addAll( this.doThawBatch( batch ) ); + for ( Collection batch : batchIdentifiableParameterList( bioSequences, 100 ) ) { + //noinspection unchecked + result.addAll( this.getSessionFactory().getCurrentSession().createQuery( "select b from BioSequence b " + + "left join fetch b.taxon tax left join fetch tax.externalDatabase left join fetch b.sequenceDatabaseEntry s " + + "left join fetch s.externalDatabase" + " left join fetch b.bioSequence2GeneProduct bs2gp " + + "left join fetch bs2gp.geneProduct gp left join fetch gp.gene g " + + "left join fetch g.aliases left join fetch g.accessions where b in (:bs)" ) + .setParameterList( "bs", batch ) + .list() ); } return result; @@ -243,34 +230,6 @@ public BioSequence find( BioSequence bioSequence ) { return ( BioSequence ) result; } - private Collection doThawBatch( Collection batch ) { - //noinspection unchecked - return this.getSessionFactory().getCurrentSession().createQuery( "select b from BioSequence b " - + " left join fetch b.taxon tax left join fetch tax.externalDatabase left join fetch b.sequenceDatabaseEntry s " - + " left join fetch s.externalDatabase" + " left join fetch b.bioSequence2GeneProduct bs2gp " - + " left join fetch bs2gp.geneProduct gp left join fetch gp.gene g" - + " left join fetch g.aliases left join fetch g.accessions where b.id in (:bids)" ) - .setParameterList( "bids", EntityUtils.getIds( batch ) ) - .list(); - } - - private void findByGenesBatch( Collection genes, Map> results ) { - //noinspection unchecked - List qr = this.getSessionFactory().getCurrentSession().createQuery( - "select distinct gene,bs from Gene gene inner join fetch gene.products ggp," - + " BioSequence bs inner join bs.bioSequence2GeneProduct bs2gp inner join bs2gp.geneProduct bsgp" - + " where ggp=bsgp and gene in (:genes)" ) - .setParameterList( "genes", genes ).list(); - for ( Object[] oa : qr ) { - Gene g = ( Gene ) oa[0]; - BioSequence b = ( BioSequence ) oa[1]; - if ( !results.containsKey( g ) ) { - results.put( g, new HashSet() ); - } - results.get( g ).add( b ); - } - } - private void debug( @Nullable BioSequence query, List results ) { StringBuilder sb = new StringBuilder(); sb.append( "\nMultiple BioSequences found matching query:\n" ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSetDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSetDaoImpl.java index 0c2ea712a4..b7e490a1dc 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSetDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSetDaoImpl.java @@ -39,6 +39,8 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * Base Spring DAO Class: is able to create, update, remove, load, and find objects of type * ubic.gemma.model.genome.gene.GeneSet. @@ -134,7 +136,7 @@ public List loadValueObjectsByIdsLite( Collect + "left join m.gene.taxon t " + "where g.id in :ids " + "group by g.id" ) - .setParameterList( "ids", ids ) + .setParameterList( "ids", optimizeParameterList( ids ) ) .list(); return fillValueObjects( result ); } @@ -280,7 +282,7 @@ private void fillGeneIds( List result ) { //noinspection unchecked List r = getSessionFactory().getCurrentSession() .createQuery( "select g.id, genes.id from GeneSet g join g.members m join m.gene genes where g.id in :ids" ) - .setParameterList( "ids", ids ) + .setParameterList( "ids", optimizeParameterList( ids ) ) .list(); Map> geneIdsByGeneSetId = r.stream() .collect( Collectors.groupingBy( row -> ( Long ) row[0], Collectors.mapping( row -> ( Long ) row[1], Collectors.toSet() ) ) ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/AnnotationAssociationDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/AnnotationAssociationDaoImpl.java index 1a38872791..8c94f1e54b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/AnnotationAssociationDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/AnnotationAssociationDaoImpl.java @@ -32,6 +32,8 @@ import java.util.Collections; import java.util.HashSet; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; + /** * @author paul */ @@ -117,6 +119,6 @@ public Collection find( Collection gps ) { //noinspection unchecked return this.getSessionFactory().getCurrentSession() .createQuery( "select b from AnnotationAssociation b join b.geneProduct gp where gp in (:gps)" ) - .setParameterList( "gps", gps ).list(); + .setParameterList( "gps", optimizeIdentifiableParameterList( gps ) ).list(); } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatAssociationDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatAssociationDaoImpl.java index 2adf92a1dd..7019d86d89 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatAssociationDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatAssociationDaoImpl.java @@ -19,7 +19,9 @@ package ubic.gemma.persistence.service.genome.sequenceAnalysis; import org.apache.commons.lang3.StringUtils; -import org.hibernate.*; +import org.hibernate.Criteria; +import org.hibernate.Hibernate; +import org.hibernate.SessionFactory; import org.hibernate.criterion.Restrictions; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; @@ -34,6 +36,8 @@ import java.util.Collections; import java.util.HashSet; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; + /** *

* Base Spring DAO Class: is able to create, update, remove, load, and find objects of type @@ -112,7 +116,7 @@ public Collection find( Collection gps ) { Collections.emptySet() : this.getSessionFactory().getCurrentSession() .createQuery( "select b from BlatAssociation b join b.geneProduct gp where gp in (:gps)" ) - .setParameterList( "gps", gps ).list(); + .setParameterList( "gps", optimizeIdentifiableParameterList( gps ) ).list(); } } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatResultDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatResultDaoImpl.java index 35c4333b9d..d17571b5bf 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatResultDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatResultDaoImpl.java @@ -23,19 +23,17 @@ import org.hibernate.SessionFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; -import org.springframework.transaction.annotation.Transactional; import ubic.gemma.model.genome.biosequence.BioSequence; import ubic.gemma.model.genome.sequenceAnalysis.BlatResult; import ubic.gemma.model.genome.sequenceAnalysis.BlatResultValueObject; import ubic.gemma.persistence.service.AbstractVoEnabledDao; import ubic.gemma.persistence.util.BusinessKey; -import ubic.gemma.persistence.util.EntityUtils; -import java.sql.Connection; import java.util.Collection; -import java.util.LinkedHashSet; import java.util.List; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; + /** *

* Base Spring DAO Class: is able to create, update, remove, load, and find objects of type @@ -79,7 +77,7 @@ public Collection thaw( Collection blatResults ) { + " left join fetch t.externalDatabase left join fetch qs.sequenceDatabaseEntry s " + " left join fetch s.externalDatabase" + " where b in :blatResults" ) - .setParameterList( "blatResults", blatResults ) + .setParameterList( "blatResults", optimizeIdentifiableParameterList( blatResults ) ) .list(); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/CacheUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/CacheUtils.java index 08903a8b18..7823a2ca28 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/CacheUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/CacheUtils.java @@ -1,14 +1,14 @@ package ubic.gemma.persistence.util; -import lombok.Value; import net.sf.ehcache.Ehcache; import org.springframework.cache.Cache; import org.springframework.cache.CacheManager; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; -import java.util.Objects; +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.Predicate; /** @@ -25,6 +25,29 @@ public static Cache getCache( CacheManager cacheManager, String cacheName ) thro return Objects.requireNonNull( cacheManager.getCache( cacheName ), String.format( "Cache with name %s does not exist.", cacheName ) ); } + public static int getSize( Cache cache ) { + if ( cache.getNativeCache() instanceof Ehcache ) { + return ( ( Ehcache ) cache.getNativeCache() ).getSize(); + } else if ( cache.getNativeCache() instanceof Map ) { + return ( ( Map ) cache.getNativeCache() ).size(); + } else { + return 0; + } + } + + /** + * Check if a cache contains a given key. + */ + public static boolean containsKey( Cache cache, Object key ) { + if ( cache.getNativeCache() instanceof Ehcache ) { + return ( ( Ehcache ) cache.getNativeCache() ).isKeyInCache( key ); + } else if ( cache.getNativeCache() instanceof Map ) { + return ( ( Map ) cache.getNativeCache() ).containsKey( key ); + } else { + return cache.get( key ) != null; + } + } + /** * Obtain the keys of all elements of a cache. */ @@ -55,6 +78,14 @@ public static void evictIf( Cache cache, Predicate predicate ) { } } + public static Lock acquireReadLock( Cache cache, Object key ) { + if ( cache.getNativeCache() instanceof Ehcache ) { + return new EhcacheLock( ( Ehcache ) cache.getNativeCache(), key, true ); + } else { + return new CacheLock( cache, key, true ); + } + } + /** * Acquire an exclusive write lock on the given key in the cache. *

@@ -62,9 +93,9 @@ public static void evictIf( Cache cache, Predicate predicate ) { */ public static Lock acquireWriteLock( Cache cache, Object key ) { if ( cache.getNativeCache() instanceof Ehcache ) { - return new EhcacheWriteLock( ( Ehcache ) cache.getNativeCache(), key ); + return new EhcacheLock( ( Ehcache ) cache.getNativeCache(), key, false ); } else { - return new NoopWriteLock(); + return new CacheLock( cache, key, false ); } } @@ -74,15 +105,16 @@ public interface Lock extends AutoCloseable { void close(); } - @Value - private static class EhcacheWriteLock implements Lock { + private static class EhcacheLock implements Lock { - Ehcache cache; - Object key; + private final Ehcache cache; + private final Object key; + private final boolean readOnly; - public EhcacheWriteLock( Ehcache cache, Object key ) { + public EhcacheLock( Ehcache cache, Object key, boolean readOnly ) { this.cache = cache; this.key = key; + this.readOnly = readOnly; lock(); } @@ -92,19 +124,60 @@ public void close() { } private void lock() { - cache.acquireWriteLockOnKey( key ); + if ( readOnly ) { + cache.acquireReadLockOnKey( key ); + } else { + cache.acquireWriteLockOnKey( key ); + } } private void unlock() { - cache.releaseWriteLockOnKey( key ); + if ( readOnly ) { + cache.releaseReadLockOnKey( key ); + } else { + cache.releaseWriteLockOnKey( key ); + } } } - private static class NoopWriteLock implements Lock { + private static class CacheLock implements Lock { + + /** + * Using a WeakHashMap to avoid memory leaks when a cache key is no longer used. + */ + private static final Map> lockByKey = new WeakHashMap<>(); + + private final ReadWriteLock lock; + private final boolean readOnly; + + public CacheLock( Cache cache, Object key, boolean readOnly ) { + synchronized ( lockByKey ) { + this.lock = lockByKey.computeIfAbsent( cache, k -> new WeakHashMap<>() ) + .computeIfAbsent( key, k -> new ReentrantReadWriteLock() ); + } + this.readOnly = readOnly; + lock(); + } @Override public void close() { - // noop + unlock(); + } + + private void lock() { + if ( readOnly ) { + lock.readLock().lock(); + } else { + lock.writeLock().lock(); + } + } + + private void unlock() { + if ( readOnly ) { + lock.readLock().unlock(); + } else { + lock.writeLock().unlock(); + } } } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/CommonQueries.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/CommonQueries.java index 2050865c40..153133ed4b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/CommonQueries.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/CommonQueries.java @@ -31,7 +31,9 @@ import java.util.*; +import static ubic.gemma.persistence.service.TableMaintenanceUtil.GENE2CS_BATCH_SIZE; import static ubic.gemma.persistence.service.TableMaintenanceUtil.GENE2CS_QUERY_SPACE; +import static ubic.gemma.persistence.util.QueryUtils.*; /** * Contains methods to perform 'common' queries that are needed across DAOs. @@ -58,7 +60,7 @@ public static Map> getArrayDesignsUsed( Collection + "ee.bioAssays b inner join b.arrayDesignUsed ad fetch all properties where ee.id in (:ees)"; org.hibernate.Query queryObject = session.createQuery( eeAdQuery ); - queryObject.setParameterList( "ees", ees ); + queryObject.setParameterList( "ees", optimizeParameterList( ees ) ); queryObject.setReadOnly( true ); queryObject.setFlushMode( FlushMode.MANUAL ); @@ -93,7 +95,7 @@ public static Map> getArrayDesignsUsedEEMap( Collection> getArrayDesignsUsedEEMap( Collection possibleEEsubsets = ListUtils.removeAll( ees, ee2ads.keySet() ); // note: CollectionUtils.removeAll has a bug. - qr = session.createQuery( subsetQuery ).setParameterList( "ees", possibleEEsubsets ).list(); + qr = session.createQuery( subsetQuery ).setParameterList( "ees", optimizeParameterList( possibleEEsubsets ) ).list(); CommonQueries.addAllAds( ee2ads, qr ); } @@ -233,25 +235,25 @@ private static void addGeneIds( Map> cs2genes, Query quer */ public static Map> getCs2GeneIdMap( Collection genes, Collection arrayDesigns, Session session ) { + if ( genes.isEmpty() || arrayDesigns.isEmpty() ) { + return Collections.emptyMap(); + } - Map> cs2genes = new HashMap<>(); - - String queryString = "SELECT CS AS csid, GENE AS geneId FROM GENE2CS g WHERE g.GENE IN (:geneIds) AND g.AD IN (:ads)"; - SQLQuery queryObject = session.createSQLQuery( queryString ); - queryObject.addScalar( "csid", LongType.INSTANCE ); - queryObject.addScalar( "geneId", LongType.INSTANCE ); - queryObject.setParameterList( "ads", arrayDesigns ); - queryObject.setParameterList( "geneIds", genes ); - queryObject.setReadOnly( true ); - queryObject.addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ); - queryObject.addSynchronizedEntityClass( ArrayDesign.class ); - queryObject.addSynchronizedEntityClass( CompositeSequence.class ); - queryObject.addSynchronizedEntityClass( Gene.class ); - - CommonQueries.addGeneIds( cs2genes, queryObject ); + Query queryObject = session.createSQLQuery( "SELECT CS AS csid, GENE AS geneId FROM GENE2CS g WHERE g.GENE IN (:geneIds) AND g.AD IN (:ads)" ) + .addScalar( "csid", LongType.INSTANCE ) + .addScalar( "geneId", LongType.INSTANCE ) + .addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .addSynchronizedEntityClass( CompositeSequence.class ) + .addSynchronizedEntityClass( Gene.class ) + .setParameterList( "ads", optimizeParameterList( arrayDesigns ) ) + .setReadOnly( true ); + Map> cs2genes = new HashMap<>(); + for ( Collection batch : batchParameterList( genes, GENE2CS_BATCH_SIZE ) ) { + CommonQueries.addGeneIds( cs2genes, queryObject.setParameterList( "geneIds", batch ) ); + } return cs2genes; - } public static Map> getCs2GeneMap( Collection genes, @@ -267,8 +269,8 @@ public static Map> getCs2GeneMap( Collection Map> cs2gene = new HashMap<>(); Query queryObject = session.createQuery( csQueryString ); queryObject.setCacheable( true ); - queryObject.setParameterList( "genes", genes ); - queryObject.setParameterList( "ads", arrayDesigns ); + queryObject.setParameterList( "genes", optimizeIdentifiableParameterList( genes ) ); + queryObject.setParameterList( "ads", optimizeIdentifiableParameterList( arrayDesigns ) ); queryObject.setReadOnly( true ); queryObject.setFlushMode( FlushMode.MANUAL ); @@ -297,7 +299,7 @@ public static Map> getCs2GeneMap( Collection Map> cs2gene = new HashMap<>(); org.hibernate.Query queryObject = session.createQuery( csQueryString ); queryObject.setCacheable( true ); - queryObject.setParameterList( "genes", genes ); + queryObject.setParameterList( "genes", optimizeIdentifiableParameterList( genes ) ); queryObject.setReadOnly( true ); queryObject.setFlushMode( FlushMode.MANUAL ); @@ -334,39 +336,39 @@ public static Map> getCs2GeneMapForProbes( Collection(); - Map> cs2genes = new HashMap<>(); - - String queryString = "SELECT CS AS csid, GENE AS geneId FROM GENE2CS g WHERE g.CS IN (:probes) "; - org.hibernate.SQLQuery queryObject = session.createSQLQuery( queryString ); - queryObject.addScalar( "csid", LongType.INSTANCE ); - queryObject.addScalar( "geneId", LongType.INSTANCE ); - queryObject.setParameterList( "probes", probes, LongType.INSTANCE ); - queryObject.setReadOnly( true ); - queryObject.addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ); - queryObject.addSynchronizedEntityClass( ArrayDesign.class ); - queryObject.addSynchronizedEntityClass( CompositeSequence.class ); - queryObject.addSynchronizedEntityClass( Gene.class ); - - CommonQueries.addGeneIds( cs2genes, queryObject ); + Query queryObject = session.createSQLQuery( "SELECT CS AS csid, GENE AS geneId FROM GENE2CS g WHERE g.CS IN (:probes) " ) + .addScalar( "csid", LongType.INSTANCE ) + .addScalar( "geneId", LongType.INSTANCE ) + .addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .addSynchronizedEntityClass( CompositeSequence.class ) + .addSynchronizedEntityClass( Gene.class ) + .setReadOnly( true ); + Map> cs2genes = new HashMap<>(); + for ( Collection batch : batchParameterList( probes, GENE2CS_BATCH_SIZE ) ) { + CommonQueries.addGeneIds( cs2genes, queryObject.setParameterList( "probes", batch ) ); + } return cs2genes; } public static Collection filterProbesByPlatform( Collection probes, Collection arrayDesignIds, Session session ) { - assert probes != null && !probes.isEmpty(); - assert arrayDesignIds != null && !arrayDesignIds.isEmpty(); - String queryString = "SELECT CS AS csid FROM GENE2CS WHERE AD IN (:adids) AND CS IN (:probes)"; - org.hibernate.SQLQuery queryObject = session.createSQLQuery( queryString ); - queryObject.addScalar( "csid", LongType.INSTANCE ); - queryObject.setParameterList( "probes", probes, LongType.INSTANCE ); - queryObject.setParameterList( "adids", arrayDesignIds, LongType.INSTANCE ); - queryObject.addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ); - queryObject.addSynchronizedEntityClass( ArrayDesign.class ); - queryObject.addSynchronizedEntityClass( CompositeSequence.class ); - queryObject.addSynchronizedEntityClass( Gene.class ); - //noinspection unchecked - return queryObject.list(); + if ( probes.isEmpty() || arrayDesignIds.isEmpty() ) { + return Collections.emptyList(); + } + Query queryObject = session.createSQLQuery( "SELECT CS AS csid FROM GENE2CS WHERE AD IN (:adids) AND CS IN (:probes)" ) + .addScalar( "csid", LongType.INSTANCE ) + .addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .addSynchronizedEntityClass( CompositeSequence.class ) + .addSynchronizedEntityClass( Gene.class ) + .setParameterList( "adids", optimizeParameterList( arrayDesignIds ), LongType.INSTANCE ); + List results = new ArrayList<>(); + for ( Collection batch : batchParameterList( probes, GENE2CS_BATCH_SIZE ) ) { + //noinspection unchecked + results.addAll( queryObject.setParameterList( "probes", batch, LongType.INSTANCE ).list() ); + } + return results; } - } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/EE2CAclQueryUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/EE2CAclQueryUtils.java index 81c4f981b1..a1e6003747 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/EE2CAclQueryUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/EE2CAclQueryUtils.java @@ -26,14 +26,18 @@ public static String formNativeAclJoinClause( String aoiIdColumn ) { } public static String formNativeAclRestrictionClause( SessionFactoryImplementor sessionFactoryImplementor, String anonymousMaskColumn ) { + return formNativeAclRestrictionClause( sessionFactoryImplementor, anonymousMaskColumn, BasePermission.READ.getMask() ); + } + + public static String formNativeAclRestrictionClause( SessionFactoryImplementor sessionFactoryImplementor, String anonymousMaskColumn, int mask ) { if ( SecurityUtil.isUserAnonymous() ) { SQLFunction bitwiseAnd = sessionFactoryImplementor.getSqlFunctionRegistry().findSQLFunction( "bitwise_and" ); - String mask = bitwiseAnd.render( new IntegerType(), Arrays.asList( anonymousMaskColumn, BasePermission.READ.getMask() ), sessionFactoryImplementor ); - return " and " + mask + " <> 0"; + String renderedMask = bitwiseAnd.render( new IntegerType(), Arrays.asList( anonymousMaskColumn, mask ), sessionFactoryImplementor ); + return " and " + renderedMask + " <> 0"; } else if ( SecurityUtil.isUserAdmin() ) { return ""; } else { - return AclQueryUtils.formNativeAclRestrictionClause( sessionFactoryImplementor ); + return AclQueryUtils.formNativeAclRestrictionClause( sessionFactoryImplementor, mask ); } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/EntityUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/EntityUtils.java index b37888600e..cd538f7520 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/EntityUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/EntityUtils.java @@ -36,6 +36,8 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * @author paul */ @@ -192,7 +194,7 @@ public static void addUserAndGroupParameters( SQLQuery queryObject, SessionFacto Collection groups = sessionFactory.getCurrentSession().createQuery( "select ug.name from UserGroup ug inner join ug.groupMembers memb where memb.userName = :user" ) .setParameter( "user", userName ).list(); - queryObject.setParameterList( "groups", groups ); + queryObject.setParameterList( "groups", optimizeParameterList( groups ) ); } if ( sqlQuery.contains( ":userName" ) ) { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterCriteriaUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterCriteriaUtils.java index 1df9fb163a..b25a6d6f3d 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterCriteriaUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterCriteriaUtils.java @@ -9,6 +9,7 @@ import java.util.Objects; import static ubic.gemma.persistence.util.PropertyMappingUtils.formProperty; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; /** * Utilities for integrating {@link Filter} with Hibernate {@link Criteria} API. @@ -87,8 +88,11 @@ private static Criterion formRestrictionClause( Filter filter ) { case greaterOrEq: return Restrictions.ge( property, filter.getRequiredValue() ); case in: - return Restrictions.in( property, ( Collection ) Objects.requireNonNull( filter.getRequiredValue(), - "Required value cannot be null for a collection." ) ); + if ( !( filter.getRequiredValue() instanceof Collection ) ) { + throw new IllegalArgumentException( "Required value must be a non-null collection for the 'in' operator." ); + } + //noinspection rawtypes,unchecked + return Restrictions.in( property, optimizeParameterList( ( Collection ) filter.getRequiredValue() ) ); default: throw new IllegalStateException( "Unexpected operator for filter: " + filter.getOperator() ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterQueryUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterQueryUtils.java index 94084aada7..0355f323c4 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterQueryUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterQueryUtils.java @@ -6,10 +6,10 @@ import javax.annotation.Nullable; import java.util.Collection; import java.util.List; -import java.util.stream.Collectors; import static java.util.Objects.requireNonNull; import static ubic.gemma.persistence.util.PropertyMappingUtils.formProperty; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; /** * Utilities for integrating {@link Filter} into {@link org.hibernate.Query}. @@ -211,9 +211,12 @@ private static void addRestrictionParameters( Query query, @Nullable Filters fil Subquery s = ( Subquery ) requireNonNull( subClause.getRequiredValue() ); addRestrictionParameters( query, Filters.by( s.getFilter() ), i - 1 ); } else if ( subClause.getOperator().equals( Filter.Operator.in ) ) { + if ( !( subClause.getRequiredValue() instanceof Collection ) ) { + throw new IllegalArgumentException( "Required value must be a non-null collection for the 'in' operator." ); + } // order is unimportant for this operation, so we can ensure that it is consistent and therefore cacheable - query.setParameterList( paramName, requireNonNull( ( Collection ) subClause.getRequiredValue(), "Required value cannot be null for the 'in' operator." ) - .stream().sorted().distinct().collect( Collectors.toList() ) ); + //noinspection rawtypes,unchecked + query.setParameterList( paramName, optimizeParameterList( ( Collection ) subClause.getRequiredValue() ) ); } else if ( subClause.getOperator().equals( Filter.Operator.like ) ) { query.setParameter( paramName, escapeLike( ( String ) requireNonNull( subClause.getRequiredValue(), "Required value cannot be null for the 'like' operator." ) ) + "%" ); } else { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/GenericValueObjectConverter.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/GenericValueObjectConverter.java deleted file mode 100644 index ca4df6e9e8..0000000000 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/GenericValueObjectConverter.java +++ /dev/null @@ -1,67 +0,0 @@ -package ubic.gemma.persistence.util; - -import org.springframework.core.convert.ConverterNotFoundException; -import org.springframework.core.convert.TypeDescriptor; -import org.springframework.core.convert.converter.ConditionalGenericConverter; -import org.springframework.core.convert.converter.Converter; -import ubic.gemma.model.IdentifiableValueObject; -import ubic.gemma.model.common.Identifiable; - -import javax.annotation.Nullable; -import java.util.*; -import java.util.stream.Collectors; - -/** - * Generic value object converter. - *

- * Performs conversion from entity to value object using a provided {@link Converter}. - * - * @author poirigui - */ -public class GenericValueObjectConverter> implements ConditionalGenericConverter { - - private final Converter converter; - private final Set convertibleTypes; - private final TypeDescriptor sourceType; - private final TypeDescriptor sourceCollectionType; - private final TypeDescriptor targetType; - private final TypeDescriptor targetListType; - - public GenericValueObjectConverter( Converter converter, Class fromClazz, Class clazz ) { - this.converter = converter; - Set convertibleTypes = new HashSet<>(); - convertibleTypes.add( new ConvertiblePair( Identifiable.class, IdentifiableValueObject.class ) ); - convertibleTypes.add( new ConvertiblePair( Collection.class, Collection.class ) ); - this.convertibleTypes = Collections.unmodifiableSet( convertibleTypes ); - this.sourceType = TypeDescriptor.valueOf( fromClazz ); - this.sourceCollectionType = TypeDescriptor.collection( Collection.class, this.sourceType ); - this.targetType = TypeDescriptor.valueOf( clazz ); - this.targetListType = TypeDescriptor.collection( List.class, this.targetType ); - } - - @Override - public Set getConvertibleTypes() { - return convertibleTypes; - } - - @Override - public boolean matches( TypeDescriptor sourceType, TypeDescriptor targetType ) { - return sourceType.isAssignableTo( this.sourceType ) && this.targetType.isAssignableTo( targetType ) || - sourceType.isAssignableTo( this.sourceCollectionType ) && this.targetListType.isAssignableTo( targetType ); - } - - @Override - public Object convert( @Nullable Object source, TypeDescriptor sourceType, TypeDescriptor targetType ) { - if ( sourceType.isAssignableTo( this.sourceType ) && this.targetType.isAssignableTo( targetType ) ) { - //noinspection unchecked - return source != null ? converter.convert( ( O ) source ) : null; - } - if ( sourceType.isAssignableTo( sourceCollectionType ) && this.targetListType.isAssignableTo( targetType ) ) { - //noinspection unchecked - return source != null ? ( ( Collection ) source ).stream() - .map( converter::convert ) - .collect( Collectors.toList() ) : null; - } - throw new ConverterNotFoundException( sourceType, targetType ); - } -} diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/HibernateUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/HibernateUtils.java new file mode 100644 index 0000000000..4fad0586b7 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/HibernateUtils.java @@ -0,0 +1,34 @@ +package ubic.gemma.persistence.util; + +import lombok.extern.apachecommons.CommonsLog; +import org.hibernate.SessionFactory; +import org.hibernate.engine.spi.SessionFactoryImplementor; +import org.hibernate.metadata.ClassMetadata; +import org.hibernate.persister.entity.AbstractEntityPersister; +import org.springframework.util.ReflectionUtils; + +import java.lang.reflect.Field; + +@CommonsLog +public class HibernateUtils { + + private static final String BATCH_FETCH_SIZE_SETTING = "gemma.hibernate.default_batch_fetch_size"; + + /** + * Obtain the batch fetch size for the given class. + */ + public static int getBatchSize( SessionFactory sessionFactory, ClassMetadata classMetadata ) { + if ( classMetadata instanceof AbstractEntityPersister ) { + Field field = ReflectionUtils.findField( AbstractEntityPersister.class, "batchSize" ); + ReflectionUtils.makeAccessible( field ); + return ( int ) ReflectionUtils.getField( field, classMetadata ); + } else if ( sessionFactory instanceof SessionFactoryImplementor ) { + return ( ( SessionFactoryImplementor ) sessionFactory ).getSettings() + .getDefaultBatchFetchSize(); + } else { + log.warn( String.format( "Could not determine batch size for %s, will fallback to the %s setting.", + classMetadata.getEntityName(), BATCH_FETCH_SIZE_SETTING ) ); + return Settings.getInt( BATCH_FETCH_SIZE_SETTING, -1 ); + } + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngine.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngine.java index e68501361e..8e8e5f1061 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngine.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngine.java @@ -14,8 +14,6 @@ */ package ubic.gemma.persistence.util; -import org.springframework.mail.SimpleMailMessage; - import java.util.Map; /** @@ -23,10 +21,23 @@ */ public interface MailEngine { - void sendAdminMessage( String bodyText, String subject ); + /** + * Return the admin email address used for {@link #sendAdminMessage(String, String)} + */ + String getAdminEmailAddress(); - void send( SimpleMailMessage msg ); + /** + * Send an email message to the administrator. + */ + void sendAdminMessage( String subject, String bodyText ); - void sendMessage( SimpleMailMessage msg, String templateName, Map model ); + /** + * Send a text email message. + */ + void sendMessage( String to, String subject, String body ); + /** + * Send a templated email message. + */ + void sendMessage( String to, String subject, String templateName, Map model ); } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngineImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngineImpl.java index c69908b154..b045dcc44d 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngineImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngineImpl.java @@ -18,18 +18,21 @@ */ package ubic.gemma.persistence.util; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.velocity.app.VelocityEngine; import org.apache.velocity.exception.VelocityException; import org.apache.velocity.runtime.RuntimeConstants; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.springframework.mail.MailException; import org.springframework.mail.MailSender; import org.springframework.mail.SimpleMailMessage; import org.springframework.stereotype.Component; import org.springframework.ui.velocity.VelocityEngineUtils; +import java.util.Arrays; import java.util.Map; /** @@ -46,49 +49,78 @@ public class MailEngineImpl implements MailEngine { @Autowired private VelocityEngine velocityEngine; + @Value("${gemma.noreply.email}") + private String noreplyEmailAddress; + + @Value("${gemma.admin.email}") + private String adminEmailAddress; + + @Value("${gemma.support.email}") + private String supportEmailAddress; + + @Override + public String getAdminEmailAddress() { + return adminEmailAddress; + } + /** * Sends a message to the gemma administrator as defined in the Gemma.properties file */ @Override - public void sendAdminMessage( String bodyText, String subject ) { - - if ( ( bodyText == null ) && ( subject == null ) ) { - MailEngineImpl.log.warn( "Not sending empty email, both subject and body are null" ); + public void sendAdminMessage( String subject, String bodyText ) { + if ( StringUtils.isBlank( adminEmailAddress ) ) { + MailEngineImpl.log.warn( "Not sending email, no admin email is configured." ); return; } - MailEngineImpl.log.info( "Sending email notification to administrator regarding: " + subject ); SimpleMailMessage msg = new SimpleMailMessage(); - msg.setTo( Settings.getAdminEmailAddress() ); - msg.setFrom( Settings.getAdminEmailAddress() ); + msg.setFrom( noreplyEmailAddress ); + msg.setTo( adminEmailAddress ); + // no need to set the reply to support, it's meant for a Gemma admin msg.setSubject( subject ); msg.setText( bodyText ); - this.send( msg ); + send( msg ); + MailEngineImpl.log.info( "Email notification sent to " + Arrays.toString( msg.getTo() ) ); } @Override - public void send( SimpleMailMessage msg ) { - try { - mailSender.send( msg ); - } catch ( MailException ex ) { - // log it and go on - MailEngineImpl.log.error( ex.getMessage(), ex ); - MailEngineImpl.log.debug( ex, ex ); - } + public void sendMessage( String to, String subject, String body ) { + SimpleMailMessage msg = new SimpleMailMessage(); + msg.setTo( to ); + msg.setFrom( noreplyEmailAddress ); + msg.setReplyTo( supportEmailAddress ); + msg.setSubject( subject ); + msg.setText( body ); + send( msg ); } @Override - public void sendMessage( SimpleMailMessage msg, String templateName, Map model ) { - String result = null; - + public void sendMessage( String to, String subject, String templateName, Map model ) { + SimpleMailMessage msg = new SimpleMailMessage(); + msg.setTo( to ); + msg.setFrom( noreplyEmailAddress ); + msg.setReplyTo( supportEmailAddress ); + msg.setSubject( subject ); try { - result = VelocityEngineUtils - .mergeTemplateIntoString( velocityEngine, templateName, RuntimeConstants.ENCODING_DEFAULT, model ); + msg.setText( VelocityEngineUtils.mergeTemplateIntoString( velocityEngine, templateName, + RuntimeConstants.ENCODING_DEFAULT, model ) ); } catch ( VelocityException e ) { - e.printStackTrace(); + MailEngineImpl.log.error( e.getMessage(), e ); + return; } + send( msg ); + } - msg.setText( result ); - this.send( msg ); + private void send( SimpleMailMessage msg ) { + if ( StringUtils.isBlank( msg.getSubject() ) || StringUtils.isBlank( msg.getText() ) ) { + MailEngineImpl.log.warn( "Not sending empty email, both subject and body are blank" ); + return; + } + try { + mailSender.send( msg ); + } catch ( MailException ex ) { + // log it and go on + MailEngineImpl.log.error( ex.getMessage(), ex ); + } } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java new file mode 100644 index 0000000000..4671ba0652 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java @@ -0,0 +1,153 @@ +package ubic.gemma.persistence.util; + +import lombok.extern.apachecommons.CommonsLog; +import org.hibernate.Query; +import org.springframework.util.Assert; +import ubic.gemma.core.util.ListUtils; +import ubic.gemma.model.common.Identifiable; + +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Utilities for {@link org.hibernate.Query}. + * @author poirigui + */ +@CommonsLog +public class QueryUtils { + + /** + * Largest parameter list size for which {@link #optimizeParameterList(Collection)} should be used. Past this size, + * no padding will be performed and a warning will be emitted. + */ + public static final int MAX_PARAMETER_LIST_SIZE = 2048; + + /** + * Optimize a given parameter list by sorting, removing duplicates and padding to the next power of two. + *

+ * This is a temporary solution until we update to Hibernate 5.2.18 which introduced {@code hibernate.query.in_clause_parameter_padding}. + * Read more about this topic. + */ + public static > Collection optimizeParameterList( Collection list ) { + if ( list.size() < 2 ) { + return list; + } + List sortedList = list.stream() + .sorted( Comparator.nullsLast( Comparator.naturalOrder() ) ) + .distinct() + .collect( Collectors.toList() ); + if ( sortedList.size() > MAX_PARAMETER_LIST_SIZE ) { + log.warn( String.format( "Optimizing a large parameter list of size %d may have a negative impact on performance, use batchParameterList() instead.", + sortedList.size() ), new Throwable() ); + return list; + } + return ListUtils.padToNextPowerOfTwo( sortedList, sortedList.get( sortedList.size() - 1 ) ); + } + + /** + * Optimize a collection of {@link Identifiable} entities. + * @see #optimizeParameterList(Collection) + */ + public static Collection optimizeIdentifiableParameterList( Collection list ) { + if ( list.size() < 2 ) { + return list; + } + List sortedList = list.stream() + .sorted( Comparator.comparing( Identifiable::getId, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) + .distinct() + .collect( Collectors.toList() ); + if ( sortedList.size() > MAX_PARAMETER_LIST_SIZE ) { + log.warn( String.format( "Optimizing a large parameter list of size %d may have a negative impact on performance, use batchIdentifiableParameterList() instead.", + sortedList.size() ), new Throwable() ); + return list; + } + return ListUtils.padToNextPowerOfTwo( sortedList, sortedList.get( sortedList.size() - 1 ) ); + } + + /** + * Partition a parameter list into a collection of batches of a given size. + *

+ * It is recommended to use a power of two in case the same query is also prepared via + * {@link #optimizeParameterList(Collection)}. This will make it so that the execution plan can be reused. + */ + public static > List> batchParameterList( Collection list, int batchSize ) { + Assert.isTrue( batchSize == -1 || batchSize > 0, "Batch size must be strictly positive or equal to -1." ); + Assert.isTrue( batchSize <= MAX_PARAMETER_LIST_SIZE, "The batch size must not exceed " + MAX_PARAMETER_LIST_SIZE + "." ); + if ( list.isEmpty() ) { + return Collections.emptyList(); + } + List sortedList = list.stream() + .sorted( Comparator.nullsLast( Comparator.naturalOrder() ) ) + .distinct() + .collect( Collectors.toList() ); + return ListUtils.batch( sortedList, batchSize ); + } + + public static List> batchIdentifiableParameterList( Collection list, int batchSize ) { + Assert.isTrue( batchSize == -1 || batchSize > 0, "Batch size must be strictly positive or equal to -1." ); + Assert.isTrue( batchSize <= MAX_PARAMETER_LIST_SIZE, "The batch size must not exceed " + MAX_PARAMETER_LIST_SIZE + "." ); + if ( list.isEmpty() ) { + return Collections.emptyList(); + } + List sortedList = list.stream() + .sorted( Comparator.comparing( Identifiable::getId, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) + .distinct() + .collect( Collectors.toList() ); + return ListUtils.batch( sortedList, batchSize ); + } + + /** + * @see #listByBatch(Query, String, Collection, int, int) + */ + public static , T> List listByBatch( Query query, String batchParam, Collection list, int batchSize ) { + return listByBatch( query, batchParam, list, batchSize, -1 ); + } + + /** + * List the results of a query by a fixed batch size. + * @param query the query + * @param batchParam a parameter of the query for batching + * @param list a collection of values for the batch parameters to retrieve + * @param batchSize the number of elements to fetch in each batch + * @param maxResults maximum number of results to return, or -1 to ignore + */ + public static , T> List listByBatch( Query query, String batchParam, Collection list, int batchSize, int maxResults ) { + List result = new ArrayList<>( list.size() ); + for ( List batch : batchParameterList( list, batchSize ) ) { + int remainingToFetch; + if ( maxResults > 0 ) { + if ( result.size() < maxResults ) { + remainingToFetch = maxResults - result.size(); + } else { + break; + } + } else { + remainingToFetch = -1; + } + query.setParameterList( batchParam, batch ); + query.setMaxResults( remainingToFetch ); + //noinspection unchecked + result.addAll( query.list() ); + } + return result; + } + + /** + * @see #streamByBatch(Query, String, Collection, int) + */ + public static , T> Stream streamByBatch( Query query, String batchParam, Collection list, int batchSize, Class clazz ) { + return streamByBatch( query, batchParam, list, batchSize ); + } + + /** + * Stream the results of a query by a fixed batch size. + * @see #listByBatch(Query, String, Collection, int) + */ + public static , T> Stream streamByBatch( Query query, String batchParam, Collection list, int batchSize ) { + //noinspection unchecked + return batchParameterList( list, batchSize ).stream() + .map( batch -> ( List ) query.setParameterList( batchParam, batch ).list() ) + .flatMap( List::stream ); + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/Settings.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/Settings.java index 403c563889..f3cf70808a 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/Settings.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/Settings.java @@ -172,10 +172,6 @@ public class Settings { } - public static String getAdminEmailAddress() { - return Settings.getString( "gemma.admin.email" ); - } - /** * @return The local directory where files generated by analyses are stored. It will end in a file separator ("/" on * unix). diff --git a/gemma-core/src/main/resources/default.properties b/gemma-core/src/main/resources/default.properties index 43705db5ee..f424733211 100755 --- a/gemma-core/src/main/resources/default.properties +++ b/gemma-core/src/main/resources/default.properties @@ -27,8 +27,11 @@ gemma.gene2cs.path=${gemma.appdata.home}/DBReports/gene2cs.info # base url for the system, used in formed URLs gemma.hosturl=https://gemma.msl.ubc.ca # 'From' address for system notifications and SMTP server settings -gemma.admin.email=gemma +gemma.admin.email=gemma@chibi.msl.ubc.ca +gemma.noreply.email=pavlab-apps@msl.ubc.ca +gemma.support.email=pavlab-support@msl.ubc.ca mail.host=localhost +mail.protocol=smtp mail.username=XXXXXX mail.password= # CORS @@ -43,15 +46,10 @@ ga.debug=false gemma.db.host=localhost gemma.db.port=3306 gemma.db.name=gemd -gemma.db.driver=com.mysql.cj.jdbc.Driver -gemma.db.url=jdbc:mysql://${gemma.db.host}:${gemma.db.port}/${gemma.db.name}?useSSL=false&rewriteBatchedStatements=true +gemma.db.url=jdbc:mysql://${gemma.db.host}:${gemma.db.port}/${gemma.db.name}?useSSL=false # You must override these settings: gemma.db.user=gemmauser gemma.db.password=XXXXXX -# This ensure some basic behaviors of our database -gemma.db.sqlMode=STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION -# Default timezone for storage of DATETIME that are mapped to exact moments (i.e. java.util.Date) -gemma.db.timezone=America/Vancouver # Maximum size for the connections pool gemma.db.maximumPoolSize=10 ############################################################ @@ -108,11 +106,13 @@ gemma.linearmodels.useR=false ########################################################### # Sequence annotation databases # Parameters for GoldenPath database installations. -gemma.goldenpath.db.driver=${gemma.db.driver} +gemma.goldenpath.db.driver=com.mysql.cj.jdbc.Driver gemma.goldenpath.db.host=${gemma.db.host} +gemma.goldenpath.db.port=${gemma.db.port} +gemma.goldenpath.db.url=jdbc:mysql://${gemma.goldenpath.db.host}:${gemma.goldenpath.db.port}?useSSL=false gemma.goldenpath.db.user=${gemma.db.user} gemma.goldenpath.db.password=${gemma.db.password} -gemma.goldenpath.db.port=${gemma.db.port} +gemma.goldenpath.db.maximumPoolSize=10 gemma.goldenpath.db.human=hg38 gemma.goldenpath.db.mouse=mm39 gemma.goldenpath.db.rat=rn7 @@ -167,7 +167,8 @@ gemma.hibernate.show_sql=false gemma.hibernate.jdbc_fetch_size=128 gemma.hibernate.jdbc_batch_size=32 # Default size for batch-fetching data (adjust as needed, requires more memory!) -gemma.hibernate.default_batch_fetch_size=100 +# It's beneficial to use a power of two because the query plan can be shared with other queries +gemma.hibernate.default_batch_fetch_size=128 #coexpression vis/grid properties #controls how many results will be returned per query gene: gemma.coexpressionSearch.maxResultsPerQueryGene=200 @@ -191,12 +192,10 @@ gemma.localTasks.corePoolSize=16 gemma.testdb.host=localhost gemma.testdb.port=3307 gemma.testdb.name=gemdtest -gemma.testdb.url=jdbc:mysql://${gemma.testdb.host}:${gemma.testdb.port}/${gemma.testdb.name}?useSSL=false&rewriteBatchedStatements=true -gemma.testdb.driver=${gemma.db.driver} +gemma.testdb.url=jdbc:mysql://${gemma.testdb.host}:${gemma.testdb.port}/${gemma.testdb.name}?useSSL=false gemma.testdb.user=gemmatest gemma.testdb.password=1234 -gemma.testdb.timezone=${gemma.db.timezone} -gemma.testdb.sqlMode=${gemma.db.sqlMode} +gemma.testdb.maximumPoolSize=10 #the external database id to exclude by default in phenocarta gemma.neurocarta.exluded_database_id=85 # Featured external databases in Gemma Web About page and Gemma REST main endpoint diff --git a/gemma-core/src/main/resources/ehcache.xml b/gemma-core/src/main/resources/ehcache.xml index 1485cb2c83..3751658a61 100644 --- a/gemma-core/src/main/resources/ehcache.xml +++ b/gemma-core/src/main/resources/ehcache.xml @@ -457,8 +457,9 @@ - - + + + diff --git a/gemma-core/src/main/resources/sql/h2/init-entities.sql b/gemma-core/src/main/resources/sql/h2/init-entities.sql new file mode 100644 index 0000000000..9d99a2ff52 --- /dev/null +++ b/gemma-core/src/main/resources/sql/h2/init-entities.sql @@ -0,0 +1,15 @@ +alter table CHARACTERISTIC + add index CHARACTERISTIC_VALUE_URI_VALUE (VALUE_URI, `VALUE`); +alter table CHARACTERISTIC + add index CHARACTERISTIC_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE (CATEGORY_URI, CATEGORY, VALUE_URI, `VALUE`); +alter table CHARACTERISTIC + add index CHARACTERISTIC_PREDICATE_URI_PREDICATE (PREDICATE_URI, PREDICATE); +alter table CHARACTERISTIC + add index CHARACTERISTIC_OBJECT_URI_OBJECT (OBJECT_URI, OBJECT); +alter table CHARACTERISTIC + add index CHARACTERISTIC_SECOND_PREDICATE_URI_SECOND_PREDICATE (SECOND_PREDICATE_URI, SECOND_PREDICATE); +alter table CHARACTERISTIC + add index CHARACTERISTIC_SECOND_OBJECT_URI_SECOND_OBJECT (SECOND_OBJECT_URI, SECOND_OBJECT); + +create index EE2C_VALUE_URI_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (VALUE_URI, `VALUE`); +create index EE2C_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (CATEGORY_URI, CATEGORY, VALUE_URI, `VALUE`); diff --git a/gemma-core/src/main/resources/sql/init-data.sql b/gemma-core/src/main/resources/sql/init-data.sql new file mode 100644 index 0000000000..2786ab788d --- /dev/null +++ b/gemma-core/src/main/resources/sql/init-data.sql @@ -0,0 +1,114 @@ +-- Initialize the database with some scraps of data. See also init-entities.sql and init-acls.sql + +-- all of these are used. +insert into AUDIT_TRAIL VALUES (1); +insert into AUDIT_TRAIL VALUES (2); +insert into AUDIT_TRAIL VALUES (3); + +set @n:=now(); + +-- username=gemmaAgent: id = 2, password = 'XXXXXXXX', audit trail #2, using salt={username} +insert into CONTACT (ID, CLASS, NAME, LAST_NAME, USER_NAME, PASSWORD, ENABLED, EMAIL, PASSWORD_HINT) values (2, 'User', 'gemmaAgent', '', 'gemmaAgent', '2db458c67b4b52bba0184611c302c9c174ce8de4', 1, 'pavlab-support@msl.ubc.ca', 'hint'); + +-- username=administrator: id = 1, password = 'administrator', audit trail #1 using salt=username ('administrator') +insert into CONTACT (ID, CLASS, NAME, LAST_NAME, USER_NAME, PASSWORD, ENABLED, EMAIL, PASSWORD_HINT) values (1, 'User', 'administrator', '', 'administrator', 'b7338dcc17d6b6c199a75540aab6d0506567b980', 1, 'pavlab-support@msl.ubc.ca', 'hint'); + +-- initialize the audit trails +insert into AUDIT_EVENT VALUES (1, @n, 'C', 'From init script', '', 1, NULL, 1); +insert into AUDIT_EVENT VALUES (2, @n, 'C', 'From init script', '', 1, NULL, 2); +insert into AUDIT_EVENT VALUES (3, @n, 'C', 'From init script', '', 1, NULL, 3); + + +-- Note that 'Administrators' is a constant set in AuthorityConstants. The names of these groups are defined in UserGroupDao. +insert into USER_GROUP (ID, NAME, DESCRIPTION, AUDIT_TRAIL_FK) VALUES (1, 'Administrators', 'Users with administrative rights', 1); +insert into USER_GROUP (ID, NAME, DESCRIPTION, AUDIT_TRAIL_FK) VALUES (2, 'Users', 'Default group for all authenticated users', 2); +insert into USER_GROUP (ID, NAME, DESCRIPTION, AUDIT_TRAIL_FK) VALUES (3, 'Agents', 'For \'autonomous\' agents that run within the server context, such as scheduled tasks.', 3); +insert into GROUP_AUTHORITY (ID, AUTHORITY, GROUP_FK) VALUES (1, 'ADMIN', 1); +insert into GROUP_AUTHORITY (ID, AUTHORITY, GROUP_FK) VALUES (2, 'USER', 2); +insert into GROUP_AUTHORITY (ID, AUTHORITY, GROUP_FK) VALUES (3, 'AGENT', 3); + +-- make admin in the admin group +insert into GROUP_MEMBERS (USER_GROUPS_FK, GROUP_MEMBERS_FK) VALUES (1, 1); + +-- add admin to the user group (note that there is no need for a corresponding ACL entry) +insert into GROUP_MEMBERS (USER_GROUPS_FK, GROUP_MEMBERS_FK) VALUES (2, 1); + +-- add agent to the agent group +insert into GROUP_MEMBERS (USER_GROUPS_FK, GROUP_MEMBERS_FK) VALUES (3, 2); + +-- taxa +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Homo sapiens','human','9606',1); +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Mus musculus','mouse','10090',1); +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Rattus norvegicus','rat','10116',1); +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE,SECONDARY_NCBI_ID) values ('Saccharomyces cerevisiae','yeast','4932',1,559292); +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Danio rerio','zebrafish','7955',1); +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Drosophila melanogaster','fly','7227',1); +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Caenorhabditis elegans','worm','6239',1); + +-- external databases + +-- we need a procedure since we have to create an audit trail +-- silly, but this needs to be in a single line because sql-maven-plugin does not deal well with statements containing multiple semi-colons +create procedure add_external_database(in name varchar(255), in description text, in web_uri varchar(255), in ftp_uri varchar(255), in type varchar(255)) begin insert into AUDIT_TRAIL (ID) values (null); insert into EXTERNAL_DATABASE (NAME, DESCRIPTION, WEB_URI, FTP_URI, TYPE, AUDIT_TRAIL_FK) values (name, description, web_uri, ftp_uri, type, last_insert_id()); end; + +-- insert new db we need to track various things +call add_external_database ('PubMed', 'PubMed database from NCBI', 'https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed', 'ftp://ftp.ncbi.nlm.nih.gov/pubmed/', 'LITERATURE'); +-- call add_external_database('GO', 'Gene Ontology database', 'https://www.godatabase.org/dev/database/', 'https://archive.godatabase.org', 'ONTOLOGY'); +call add_external_database('GEO', 'Gene Expression Omnibus', 'https://www.ncbi.nlm.nih.gov/geo/', 'ftp://ftp.ncbi.nih.gov/pub/geo/DATA', 'EXPRESSION'); +call add_external_database('ArrayExpress', 'EBI ArrayExpress', 'https://www.ebi.ac.uk/arrayexpress/', 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/', 'EXPRESSION'); +call add_external_database('Genbank', 'NCBI Genbank', 'https://www.ncbi.nlm.nih.gov/Genbank/index.html', 'ftp://ftp.ncbi.nih.gov/genbank/', 'SEQUENCE'); +call add_external_database('Entrez Gene', 'NCBI Gene database', 'https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene', 'ftp://ftp.ncbi.nih.gov/gene/', 'SEQUENCE'); +call add_external_database('Ensembl', 'EMBL - EBI/Sanger Institute genome annotations', 'https://www.ensembl.org/', 'ftp://ftp.ensembl.org/pub/', 'GENOME'); +call add_external_database('OBO_REL', 'Open Biomedical Ontologies Relationships', 'https://www.obofoundry.org/ro/', NULL, 'ONTOLOGY'); +call add_external_database('STRING', 'STRING - Known and Predicted Protein-Protein Interactions', 'https://string-db.org/version_8_2/newstring_cgi/show_network_section.pl?identifiers=', NULL, 'PROTEIN'); +call add_external_database('hg18', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('hg19', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('hg38', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('mm8', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('mm9', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('mm10', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('mm39', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('rn4', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('rn6', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('rn7', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('hg18 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/hg18/database/', NULL, 'OTHER'); +call add_external_database('hg19 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/hg19/database/', NULL, 'OTHER'); +call add_external_database('hg38 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/hg38/database/', NULL, 'OTHER'); +call add_external_database('mm8 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm8/database/', NULL, 'OTHER'); +call add_external_database('mm9 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm9/database/', NULL, 'OTHER'); +call add_external_database('mm10 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm10/database/', NULL, 'OTHER'); +call add_external_database('mm39 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm39/database/', NULL, 'OTHER'); +call add_external_database('rn4 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/rn4/database/', NULL, 'OTHER'); +call add_external_database('rn6 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/rn6/database/', NULL, 'OTHER'); +call add_external_database('rn7 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/rn7/database/', NULL, 'OTHER'); +call add_external_database('hg38 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); +call add_external_database('mm10 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); +call add_external_database('mm39 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); +call add_external_database('rn7 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); +call add_external_database('gene', NULL, NULL, 'https://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz', 'OTHER'); +call add_external_database('go', NULL, NULL, 'https://ftp.ncbi.nih.gov/gene/DATA/gene2go.gz', 'ONTOLOGY'); +call add_external_database('multifunctionality', NULL, NULL, NULL, 'OTHER'); +call add_external_database('gene2cs', NULL, NULL, NULL, 'OTHER'); + +drop procedure add_external_database; + +create procedure add_external_database_relation(in parent_name varchar(255), in child_name varchar(255)) begin select @parent_id := ID from EXTERNAL_DATABASE where name = parent_name; update EXTERNAL_DATABASE set EXTERNAL_DATABASE_FK = @parent_id where NAME = child_name; end; + +call add_external_database_relation('hg38', 'hg38 annotations'); +call add_external_database_relation('hg19', 'hg19 annotations'); +call add_external_database_relation('hg18', 'hg18 annotations'); +call add_external_database_relation('mm39', 'mm39 annotations'); +call add_external_database_relation('mm10', 'mm10 annotations'); +call add_external_database_relation('mm9', 'mm9 annotations'); +call add_external_database_relation('mm8', 'mm8 annotations'); +call add_external_database_relation('rn7', 'rn7 annotations'); +call add_external_database_relation('rn6', 'rn4 annotations'); +call add_external_database_relation('rn4', 'rn6 annotations'); + +call add_external_database_relation('hg38', 'hg38 RNA-Seq annotations'); +call add_external_database_relation('mm39', 'mm39 RNA-Seq annotations'); +call add_external_database_relation('mm10', 'mm10 RNA-Seq annotations'); +call add_external_database_relation('rn7', 'rn7 RNA-Seq annotations'); + +drop procedure add_external_database_relation; + diff --git a/gemma-core/src/main/resources/sql/init-entities.sql b/gemma-core/src/main/resources/sql/init-entities.sql index a706a3fa47..6a23844a3b 100644 --- a/gemma-core/src/main/resources/sql/init-entities.sql +++ b/gemma-core/src/main/resources/sql/init-entities.sql @@ -1,116 +1,68 @@ --- Initialize the database with some scraps of data. See also init-indices.sql and mysql-acegi-acl.sql. - --- all of these are used. -insert into AUDIT_TRAIL VALUES (1); -insert into AUDIT_TRAIL VALUES (2); -insert into AUDIT_TRAIL VALUES (3); - -set @n:=now(); - --- username=gemmaAgent: id = 2, password = 'XXXXXXXX', audit trail #2, using salt={username} -insert into CONTACT (ID, CLASS, NAME, LAST_NAME, USER_NAME, PASSWORD, ENABLED, EMAIL, PASSWORD_HINT) values (2, 'User', 'gemmaAgent', '', 'gemmaAgent', '2db458c67b4b52bba0184611c302c9c174ce8de4', 1, 'pavlab-support@msl.ubc.ca', 'hint'); - --- username=administrator: id = 1, password = 'administrator', audit trail #1 using salt=username ('administrator') -insert into CONTACT (ID, CLASS, NAME, LAST_NAME, USER_NAME, PASSWORD, ENABLED, EMAIL, PASSWORD_HINT) values (1, 'User', 'administrator', '', 'administrator', 'b7338dcc17d6b6c199a75540aab6d0506567b980', 1, 'pavlab-support@msl.ubc.ca', 'hint'); - --- initialize the audit trails -insert into AUDIT_EVENT VALUES (1, @n, 'C', 'From init script', '', 1, NULL, 1); -insert into AUDIT_EVENT VALUES (2, @n, 'C', 'From init script', '', 1, NULL, 2); -insert into AUDIT_EVENT VALUES (3, @n, 'C', 'From init script', '', 1, NULL, 3); - - --- Note that 'Administrators' is a constant set in AuthorityConstants. The names of these groups are defined in UserGroupDao. -insert into USER_GROUP (ID, NAME, DESCRIPTION, AUDIT_TRAIL_FK) VALUES (1, 'Administrators', 'Users with administrative rights', 1); -insert into USER_GROUP (ID, NAME, DESCRIPTION, AUDIT_TRAIL_FK) VALUES (2, 'Users', 'Default group for all authenticated users', 2); -insert into USER_GROUP (ID, NAME, DESCRIPTION, AUDIT_TRAIL_FK) VALUES (3, 'Agents', 'For \'autonomous\' agents that run within the server context, such as scheduled tasks.', 3); -insert into GROUP_AUTHORITY (ID, AUTHORITY, GROUP_FK) VALUES (1, 'ADMIN', 1); -insert into GROUP_AUTHORITY (ID, AUTHORITY, GROUP_FK) VALUES (2, 'USER', 2); -insert into GROUP_AUTHORITY (ID, AUTHORITY, GROUP_FK) VALUES (3, 'AGENT', 3); - --- make admin in the admin group -insert into GROUP_MEMBERS (USER_GROUPS_FK, GROUP_MEMBERS_FK) VALUES (1, 1); - --- add admin to the user group (note that there is no need for a corresponding ACL entry) -insert into GROUP_MEMBERS (USER_GROUPS_FK, GROUP_MEMBERS_FK) VALUES (2, 1); - --- add agent to the agent group -insert into GROUP_MEMBERS (USER_GROUPS_FK, GROUP_MEMBERS_FK) VALUES (3, 2); - --- taxa -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Homo sapiens','human','9606',1); -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Mus musculus','mouse','10090',1); -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Rattus norvegicus','rat','10116',1); -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE,SECONDARY_NCBI_ID) values ('Saccharomyces cerevisiae','yeast','4932',1,559292); -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Danio rerio','zebrafish','7955',1); -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Drosophila melanogaster','fly','7227',1); -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Caenorhabditis elegans','worm','6239',1); - --- external databases - --- we need a procedure since we have to create an audit trail --- silly, but this needs to be in a single line because sql-maven-plugin does not deal well with statements containing multiple semi-colons -create procedure add_external_database(in name varchar(255), in description text, in web_uri varchar(255), in ftp_uri varchar(255), in type varchar(255)) begin insert into AUDIT_TRAIL (ID) values (null); insert into EXTERNAL_DATABASE (NAME, DESCRIPTION, WEB_URI, FTP_URI, TYPE, AUDIT_TRAIL_FK) values (name, description, web_uri, ftp_uri, type, last_insert_id()); end; - --- insert new db we need to track various things -call add_external_database ('PubMed', 'PubMed database from NCBI', 'https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed', 'ftp://ftp.ncbi.nlm.nih.gov/pubmed/', 'LITERATURE'); --- call add_external_database('GO', 'Gene Ontology database', 'https://www.godatabase.org/dev/database/', 'https://archive.godatabase.org', 'ONTOLOGY'); -call add_external_database('GEO', 'Gene Expression Omnibus', 'https://www.ncbi.nlm.nih.gov/geo/', 'ftp://ftp.ncbi.nih.gov/pub/geo/DATA', 'EXPRESSION'); -call add_external_database('ArrayExpress', 'EBI ArrayExpress', 'https://www.ebi.ac.uk/arrayexpress/', 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/', 'EXPRESSION'); -call add_external_database('Genbank', 'NCBI Genbank', 'https://www.ncbi.nlm.nih.gov/Genbank/index.html', 'ftp://ftp.ncbi.nih.gov/genbank/', 'SEQUENCE'); -call add_external_database('Entrez Gene', 'NCBI Gene database', 'https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene', 'ftp://ftp.ncbi.nih.gov/gene/', 'SEQUENCE'); -call add_external_database('Ensembl', 'EMBL - EBI/Sanger Institute genome annotations', 'https://www.ensembl.org/', 'ftp://ftp.ensembl.org/pub/', 'GENOME'); -call add_external_database('OBO_REL', 'Open Biomedical Ontologies Relationships', 'https://www.obofoundry.org/ro/', NULL, 'ONTOLOGY'); -call add_external_database('STRING', 'STRING - Known and Predicted Protein-Protein Interactions', 'https://string-db.org/version_8_2/newstring_cgi/show_network_section.pl?identifiers=', NULL, 'PROTEIN'); -call add_external_database('hg18', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('hg19', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('hg38', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('mm8', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('mm9', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('mm10', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('mm39', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('rn4', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('rn6', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('rn7', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('hg18 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/hg18/database/', NULL, 'OTHER'); -call add_external_database('hg19 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/hg19/database/', NULL, 'OTHER'); -call add_external_database('hg38 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/hg38/database/', NULL, 'OTHER'); -call add_external_database('mm8 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm8/database/', NULL, 'OTHER'); -call add_external_database('mm9 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm9/database/', NULL, 'OTHER'); -call add_external_database('mm10 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm10/database/', NULL, 'OTHER'); -call add_external_database('mm39 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm39/database/', NULL, 'OTHER'); -call add_external_database('rn4 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/rn4/database/', NULL, 'OTHER'); -call add_external_database('rn6 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/rn6/database/', NULL, 'OTHER'); -call add_external_database('rn7 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/rn7/database/', NULL, 'OTHER'); -call add_external_database('hg38 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); -call add_external_database('mm10 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); -call add_external_database('mm39 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); -call add_external_database('rn7 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); -call add_external_database('gene', NULL, NULL, 'https://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz', 'OTHER'); -call add_external_database('go', NULL, NULL, 'https://ftp.ncbi.nih.gov/gene/DATA/gene2go.gz', 'ONTOLOGY'); -call add_external_database('multifunctionality', NULL, NULL, NULL, 'OTHER'); -call add_external_database('gene2cs', NULL, NULL, NULL, 'OTHER'); - -drop procedure add_external_database; - -create procedure add_external_database_relation(in parent_name varchar(255), in child_name varchar(255)) begin select @parent_id := ID from EXTERNAL_DATABASE where name = parent_name; update EXTERNAL_DATABASE set EXTERNAL_DATABASE_FK = @parent_id where NAME = child_name; end; - -call add_external_database_relation('hg38', 'hg38 annotations'); -call add_external_database_relation('hg19', 'hg19 annotations'); -call add_external_database_relation('hg18', 'hg18 annotations'); -call add_external_database_relation('mm39', 'mm39 annotations'); -call add_external_database_relation('mm10', 'mm10 annotations'); -call add_external_database_relation('mm9', 'mm9 annotations'); -call add_external_database_relation('mm8', 'mm8 annotations'); -call add_external_database_relation('rn7', 'rn7 annotations'); -call add_external_database_relation('rn6', 'rn4 annotations'); -call add_external_database_relation('rn4', 'rn6 annotations'); - -call add_external_database_relation('hg38', 'hg38 RNA-Seq annotations'); -call add_external_database_relation('mm39', 'mm39 RNA-Seq annotations'); -call add_external_database_relation('mm10', 'mm10 RNA-Seq annotations'); -call add_external_database_relation('rn7', 'rn7 RNA-Seq annotations'); - -drop procedure add_external_database_relation; +-- Add some indices that are not included in the generated gemma-ddl.sql. +-- Some of these are very important for performance + +alter table ACLSID + add index ACLSID_CLASS (class); +alter table INVESTIGATION + add index INVESTIGATION_CLASS (class); +alter table DATABASE_ENTRY + add index acc_ex (ACCESSION, EXTERNAL_DATABASE_FK); +alter table CHROMOSOME_FEATURE + add index CHROMOSOME_FEATURE_CLASS (class); +alter table CHROMOSOME_FEATURE + add index symbol_tax (OFFICIAL_SYMBOL, TAXON_FK); +alter table AUDIT_EVENT_TYPE + add index AUDIT_EVENT_TYPE_CLASS (class); +alter table ANALYSIS + add index ANALYSIS_CLASS (class); + +alter table CHARACTERISTIC + add index CHARACTERISTIC_CLASS (class); + +alter table PROCESSED_EXPRESSION_DATA_VECTOR + add index experimentProcessedVectorProbes (EXPRESSION_EXPERIMENT_FK, DESIGN_ELEMENT_FK); + +alter table DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT + add index resultSetProbes (RESULT_SET_FK, PROBE_FK); +alter table DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT + add index probeResultSets (PROBE_FK, RESULT_SET_FK); + +alter table CONTACT + add index fullname (NAME, LAST_NAME); + +-- should remove the FIRST_GENE_FK and SECOND_GENE_FK indices, but they get given 'random' names. +-- Drop the second_gene_fk constraint. +-- alter table HUMAN_GENE_COEXPRESSION drop foreign key FKF9E6557F21D58F19; +-- alter table MOUSE_GENE_COEXPRESSION drop foreign key FKFC61C4F721D58F19; +-- alter table RAT_GENE_COEXPRESSION drop foreign key FKDE59FC7721D58F19; +-- alter table OTHER_GENE_COEXPRESSION drop foreign key FK74B9A3E221D58F19; + +alter table HUMAN_GENE_COEXPRESSION + add index hfgsg (FIRST_GENE_FK, SECOND_GENE_FK); +alter table MOUSE_GENE_COEXPRESSION + add index mfgsg (FIRST_GENE_FK, SECOND_GENE_FK); +alter table RAT_GENE_COEXPRESSION + add index rfgsg (FIRST_GENE_FK, SECOND_GENE_FK); +alter table OTHER_GENE_COEXPRESSION + add index ofgsg (FIRST_GENE_FK, SECOND_GENE_FK); + +-- same for these, should drop the key for EXPERIMENT_FK, manually +alter table HUMAN_EXPERIMENT_COEXPRESSION + add index ECL1EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK); +alter table HUMAN_EXPERIMENT_COEXPRESSION + add constraint ECL1EFK foreign key (EXPERIMENT_FK) references INVESTIGATION (ID); +alter table MOUSE_EXPERIMENT_COEXPRESSION + add index ECL2EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK); +alter table MOUSE_EXPERIMENT_COEXPRESSION + add constraint ECL2EFK foreign key (EXPERIMENT_FK) references INVESTIGATION (ID); +alter table RAT_EXPERIMENT_COEXPRESSION + add index ECL3EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK); +alter table RAT_EXPERIMENT_COEXPRESSION + add constraint ECL3EFK foreign key (EXPERIMENT_FK) references INVESTIGATION (ID); +alter table OTHER_EXPERIMENT_COEXPRESSION + add index ECL4EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK); +alter table OTHER_EXPERIMENT_COEXPRESSION + add constraint ECL4EFK foreign key (EXPERIMENT_FK) references INVESTIGATION (ID); -- denormalized table joining genes and compositeSequences; maintained by TableMaintenanceUtil. create table GENE2CS @@ -136,7 +88,7 @@ create table EXPRESSION_EXPERIMENT2CHARACTERISTIC DESCRIPTION text, CATEGORY varchar(255), CATEGORY_URI varchar(255), - VALUE varchar(255), + `VALUE` varchar(255), VALUE_URI varchar(255), ORIGINAL_VALUE varchar(255), EVIDENCE_CODE varchar(255), @@ -146,16 +98,15 @@ create table EXPRESSION_EXPERIMENT2CHARACTERISTIC primary key (ID, EXPRESSION_EXPERIMENT_FK) ); --- note: constraint names cannot exceed 64 characters, so we cannot use the usual naming convention --- no URI exceeds 100 characters in practice, so we only index a prefix alter table EXPRESSION_EXPERIMENT2CHARACTERISTIC - add constraint EE2C_CHARACTERISTIC_FKC foreign key (ID) references CHARACTERISTIC (ID) on update cascade on delete cascade, - add constraint EE2C_EXPRESSION_EXPERIMENT_FKC foreign key (EXPRESSION_EXPERIMENT_FK) references INVESTIGATION (id) on update cascade on delete cascade, - add index EE2C_VALUE (VALUE), - add index EE2C_CATEGORY (CATEGORY), - add index EE2C_VALUE_URI_VALUE (VALUE_URI(100), VALUE), - add index EE2C_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE (CATEGORY_URI(100), CATEGORY, VALUE_URI(100), VALUE), - add index EE2C_LEVEL (LEVEL); + add constraint EE2C_CHARACTERISTIC_FKC foreign key (ID) references CHARACTERISTIC (ID) on update cascade on delete cascade; +alter table EXPRESSION_EXPERIMENT2CHARACTERISTIC + add constraint EE2C_EXPRESSION_EXPERIMENT_FKC foreign key (EXPRESSION_EXPERIMENT_FK) references INVESTIGATION (id) on update cascade on delete cascade; + +-- note: constraint names cannot exceed 64 characters, so we cannot use the usual naming convention +create index EE2C_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (`VALUE`); +create index EE2C_CATEGORY on EXPRESSION_EXPERIMENT2CHARACTERISTIC (CATEGORY); +create index EE2C_LEVEL on EXPRESSION_EXPERIMENT2CHARACTERISTIC (LEVEL); create table EXPRESSION_EXPERIMENT2ARRAY_DESIGN ( @@ -169,5 +120,6 @@ create table EXPRESSION_EXPERIMENT2ARRAY_DESIGN ); alter table EXPRESSION_EXPERIMENT2ARRAY_DESIGN - add constraint EE2AD_EXPRESSION_EXPERIMENT_FKC foreign key (EXPRESSION_EXPERIMENT_FK) references INVESTIGATION (id) on update cascade on delete cascade, - add constraint EE2AD_ARRAY_DESIGN_FKC foreign key (ARRAY_DESIGN_FK) references ARRAY_DESIGN (ID) on update cascade on delete cascade; \ No newline at end of file + add constraint EE2AD_EXPRESSION_EXPERIMENT_FKC foreign key (EXPRESSION_EXPERIMENT_FK) references INVESTIGATION (id) on update cascade on delete cascade; +alter table EXPRESSION_EXPERIMENT2ARRAY_DESIGN + add constraint EE2AD_ARRAY_DESIGN_FKC foreign key (ARRAY_DESIGN_FK) references ARRAY_DESIGN (ID) on update cascade on delete cascade; diff --git a/gemma-core/src/main/resources/sql/init-indices.sql b/gemma-core/src/main/resources/sql/init-indices.sql deleted file mode 100644 index 17bf61ec80..0000000000 --- a/gemma-core/src/main/resources/sql/init-indices.sql +++ /dev/null @@ -1,129 +0,0 @@ --- Add some indices that are not included in the generated gemma-ddl.sql. Some of these are very important to performance - -ALTER TABLE ACLSID - ADD INDEX class (class); - -ALTER TABLE CURATION_DETAILS - ADD INDEX TROUBLED_IX (TROUBLED); - -ALTER TABLE BIO_SEQUENCE - ADD INDEX name (NAME); -ALTER TABLE ALTERNATE_NAME - ADD INDEX name (NAME); -ALTER TABLE INVESTIGATION - ADD INDEX name (NAME), - ADD INDEX shortname (SHORT_NAME), - ADD INDEX class (class), - ADD INDEX INVESTIGATION_NUMBER_OF_SAMPLES (NUMBER_OF_SAMPLES), - ADD INDEX INVESTIGATION_NUMBER_OF_DATA_VECTORS (NUMBER_OF_DATA_VECTORS); -ALTER TABLE DATABASE_ENTRY - ADD INDEX acc_ex (ACCESSION, EXTERNAL_DATABASE_FK); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX symbol_tax (OFFICIAL_SYMBOL, TAXON_FK); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX ncbigeneid (NCBI_GENE_ID); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX ncbigi (NCBI_GI); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX previous_ncbiid (PREVIOUS_NCBI_ID); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX ensemblid (ENSEMBL_ID); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX name (NAME); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX class (class); -ALTER TABLE GENE_ALIAS - ADD INDEX `alias` (`ALIAS`); -ALTER TABLE COMPOSITE_SEQUENCE - ADD INDEX name (NAME); -ALTER TABLE PHYSICAL_LOCATION - ADD INDEX BIN_KEY (BIN); -ALTER TABLE AUDIT_EVENT_TYPE - ADD INDEX class (class); -ALTER TABLE ANALYSIS - ADD INDEX class (class), - ADD INDEX ANALYSIS_NUMBER_OF_ELEMENTS_ANALYZED (NUMBER_OF_ELEMENTS_ANALYZED); -ALTER TABLE ANALYSIS_RESULT_SET - ADD INDEX ANALYSIS_RESULT_SET_NUMBER_OF_GENES_TESTED (NUMBER_OF_GENES_TESTED), - ADD INDEX ANALYSIS_RESULT_SET_NUMBER_OF_PROBES_TESTED (NUMBER_OF_PROBES_TESTED); --- no URI exceeds 100 characters in practice, so we only index a prefix -ALTER TABLE CHARACTERISTIC - ADD INDEX class (class), - ADD INDEX CHARACTERISTIC_VALUE (VALUE), - ADD INDEX CHARACTERISTIC_CATEGORY (CATEGORY), - ADD INDEX CHARACTERISTIC_VALUE_URI_VALUE (VALUE_URI(100), VALUE), - ADD INDEX CHARACTERISTIC_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE (CATEGORY_URI(100), CATEGORY, VALUE_URI(100), VALUE), - ADD INDEX CHARACTERISTIC_EVIDENCE_CODE (EVIDENCE_CODE), - ADD INDEX CHARACTERISTIC_PREDICATE_URI_PREDICATE (PREDICATE_URI(100), PREDICATE), - ADD INDEX CHARACTERISTIC_OBJECT_URI_OBJECT (OBJECT_URI(100), OBJECT), - ADD INDEX CHARACTERISTIC_SECOND_PREDICATE_URI_SECOND_PREDICATE (SECOND_PREDICATE_URI(100), SECOND_PREDICATE), - ADD INDEX CHARACTERISTIC_SECOND_OBJECT_URI_SECOND_OBJECT (SECOND_OBJECT_URI(100), SECOND_OBJECT); -ALTER TABLE GENE_SET - ADD INDEX name (NAME); -ALTER TABLE PROCESSED_EXPRESSION_DATA_VECTOR - ADD INDEX experimentProcessedVectorProbes (EXPRESSION_EXPERIMENT_FK, DESIGN_ELEMENT_FK); -ALTER TABLE DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT - ADD INDEX resultSetProbes (RESULT_SET_FK, PROBE_FK); -ALTER TABLE DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT - ADD INDEX probeResultSets (PROBE_FK, RESULT_SET_FK); -ALTER TABLE TAXON - ADD INDEX taxonncbiid (NCBI_ID); -ALTER TABLE TAXON - ADD INDEX taxonsecondncbiid (SECONDARY_NCBI_ID); -ALTER TABLE TAXON - ADD INDEX taxoncommonname (COMMON_NAME); -ALTER TABLE TAXON - ADD INDEX taxonscientificname (SCIENTIFIC_NAME); -ALTER TABLE CONTACT - ADD INDEX fullname (NAME, LAST_NAME); - --- should remove the FIRST_GENE_FK and SECOND_GENE_FK indices, but they get given 'random' names. --- Drop the second_gene_fk constraint. --- alter table HUMAN_GENE_COEXPRESSION drop foreign key FKF9E6557F21D58F19; --- alter table MOUSE_GENE_COEXPRESSION drop foreign key FKFC61C4F721D58F19; --- alter table RAT_GENE_COEXPRESSION drop foreign key FKDE59FC7721D58F19; --- alter table OTHER_GENE_COEXPRESSION drop foreign key FK74B9A3E221D58F19; - -ALTER TABLE HUMAN_GENE_COEXPRESSION - ADD INDEX hfgsg (FIRST_GENE_FK, SECOND_GENE_FK); -ALTER TABLE MOUSE_GENE_COEXPRESSION - ADD INDEX mfgsg (FIRST_GENE_FK, SECOND_GENE_FK); -ALTER TABLE RAT_GENE_COEXPRESSION - ADD INDEX rfgsg (FIRST_GENE_FK, SECOND_GENE_FK); -ALTER TABLE OTHER_GENE_COEXPRESSION - ADD INDEX ofgsg (FIRST_GENE_FK, SECOND_GENE_FK); - --- same for these, should drop the key for EXPERIMENT_FK, manually -ALTER TABLE HUMAN_EXPERIMENT_COEXPRESSION - ADD INDEX ECL1EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK), - ADD CONSTRAINT ECL1EFK FOREIGN KEY (EXPERIMENT_FK) REFERENCES INVESTIGATION (ID); -ALTER TABLE MOUSE_EXPERIMENT_COEXPRESSION - ADD INDEX ECL2EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK), - ADD CONSTRAINT ECL2EFK FOREIGN KEY (EXPERIMENT_FK) REFERENCES INVESTIGATION (ID); -ALTER TABLE RAT_EXPERIMENT_COEXPRESSION - ADD INDEX ECL3EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK), - ADD CONSTRAINT ECL3EFK FOREIGN KEY (EXPERIMENT_FK) REFERENCES INVESTIGATION (ID); -ALTER TABLE OTHER_EXPERIMENT_COEXPRESSION - ADD INDEX ECL4EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK), - ADD CONSTRAINT ECL4EFK FOREIGN KEY (EXPERIMENT_FK) REFERENCES INVESTIGATION (ID); - --- candidates for removal -ALTER TABLE DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT - ADD INDEX corrpvalbin (CORRECTED_P_VALUE_BIN); -ALTER TABLE HIT_LIST_SIZE - ADD INDEX direction (DIRECTION); - -ALTER TABLE MEASUREMENT - ADD INDEX MEASUREMENT_KIND_CV (KIND_C_V), - ADD INDEX MEASUREMENT_OTHER_KIND (OTHER_KIND), - ADD INDEX MEASUREMENT_REPRESENTATION (REPRESENTATION), - ADD INDEX MEASUREMENT_TYPE (TYPE), - ADD INDEX MEASUREMENT_VALUE (VALUE); - -ALTER TABLE GEEQ - ADD INDEX GEEQ_DETECTED_QUALITY_SCORE (DETECTED_QUALITY_SCORE), - ADD INDEX GEEQ_DETECTED_SUITABILITY_SCORE (DETECTED_SUITABILITY_SCORE), - ADD INDEX GEEQ_MANUAL_QUALITY_SCORE (MANUAL_QUALITY_SCORE), - ADD INDEX GEEQ_MANUAL_QUALITY_OVERRIDE (MANUAL_QUALITY_OVERRIDE), - ADD INDEX GEEQ_MANUAL_SUITABILITY_SCORE (MANUAL_SUITABILITY_SCORE), - ADD INDEX GEEQ_MANUAL_SUITABILITY_OVERRIDE (MANUAL_SUITABILITY_OVERRIDE); \ No newline at end of file diff --git a/gemma-core/src/main/resources/sql/migrations/db.1.31.3.sql b/gemma-core/src/main/resources/sql/migrations/db.1.31.3.sql new file mode 100644 index 0000000000..6c0545c40d --- /dev/null +++ b/gemma-core/src/main/resources/sql/migrations/db.1.31.3.sql @@ -0,0 +1,2 @@ +create index AUDIT_EVENT_DATE on AUDIT_EVENT (DATE); +create index AUDIT_EVENT_ACTION on AUDIT_EVENT (ACTION) \ No newline at end of file diff --git a/gemma-core/src/main/resources/sql/mysql/init-entities.sql b/gemma-core/src/main/resources/sql/mysql/init-entities.sql new file mode 100644 index 0000000000..83e0bc036f --- /dev/null +++ b/gemma-core/src/main/resources/sql/mysql/init-entities.sql @@ -0,0 +1,16 @@ +-- no URI exceeds 100 characters in practice, so we only index a prefix +alter table CHARACTERISTIC + add index CHARACTERISTIC_VALUE_URI_VALUE (VALUE_URI(100), `VALUE`); +alter table CHARACTERISTIC + add index CHARACTERISTIC_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE (CATEGORY_URI(100), CATEGORY, VALUE_URI(100), `VALUE`); +alter table CHARACTERISTIC + add index CHARACTERISTIC_PREDICATE_URI_PREDICATE (PREDICATE_URI(100), PREDICATE); +alter table CHARACTERISTIC + add index CHARACTERISTIC_OBJECT_URI_OBJECT (OBJECT_URI(100), OBJECT); +alter table CHARACTERISTIC + add index CHARACTERISTIC_SECOND_PREDICATE_URI_SECOND_PREDICATE (SECOND_PREDICATE_URI(100), SECOND_PREDICATE); +alter table CHARACTERISTIC + add index CHARACTERISTIC_SECOND_OBJECT_URI_SECOND_OBJECT (SECOND_OBJECT_URI(100), SECOND_OBJECT); + +create index EE2C_VALUE_URI_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (VALUE_URI(100), `VALUE`); +create index EE2C_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (CATEGORY_URI(100), CATEGORY, VALUE_URI(100), `VALUE`); diff --git a/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml b/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml index 049453c23b..5c50319099 100644 --- a/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml +++ b/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml @@ -1,44 +1,46 @@ + http://www.springframework.org/schema/beans/spring-beans-3.2.xsd http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd"> + + + true + + America/Vancouver + + sql_mode='STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' + - + + - - - ${gemma.db.timezone} - sql_mode='${gemma.db.sqlMode}' - - + - + - - - ${gemma.testdb.timezone} - sql_mode='${gemma.testdb.sqlMode}' - - + + + diff --git a/gemma-core/src/main/resources/ubic/gemma/applicationContext-serviceBeans.xml b/gemma-core/src/main/resources/ubic/gemma/applicationContext-serviceBeans.xml index 489761263f..86d2c049f4 100644 --- a/gemma-core/src/main/resources/ubic/gemma/applicationContext-serviceBeans.xml +++ b/gemma-core/src/main/resources/ubic/gemma/applicationContext-serviceBeans.xml @@ -70,6 +70,10 @@ + + + + diff --git a/gemma-core/src/main/resources/ubic/gemma/core/ontology/valueStringToOntologyTermMappings.txt b/gemma-core/src/main/resources/ubic/gemma/core/ontology/valueStringToOntologyTermMappings.txt index 90124e1ba3..b1e8ecd68c 100644 --- a/gemma-core/src/main/resources/ubic/gemma/core/ontology/valueStringToOntologyTermMappings.txt +++ b/gemma-core/src/main/resources/ubic/gemma/core/ontology/valueStringToOntologyTermMappings.txt @@ -127,7 +127,6 @@ Bone marrow sample bone marrow http://purl.obolibrary.org/obo/UBERON_0002371 org bone marrow-derived macrophages bone marrow macrophage http://purl.obolibrary.org/obo/CL_0002476 cell type http://www.ebi.ac.uk/efo/EFO_0000324 Borrelia burgdorferi Borreliella burgdorferi http://purl.obolibrary.org/obo/NCBITaxon_139 treatment http://www.ebi.ac.uk/efo/EFO_0000727 brain brain http://purl.obolibrary.org/obo/UBERON_0000955 organism part http://www.ebi.ac.uk/efo/EFO_0000635 -brain  brain http://purl.obolibrary.org/obo/UBERON_0000955 organism part http://www.ebi.ac.uk/efo/EFO_0000635 Brain - Cerebellum cerebellum http://purl.obolibrary.org/obo/UBERON_0002037 organism part http://www.ebi.ac.uk/efo/EFO_0000635 Brain - Hippocampus Ammon's horn http://purl.obolibrary.org/obo/UBERON_0001954 organism part http://www.ebi.ac.uk/efo/EFO_0000635 brain (cortex) cerebral cortex http://purl.obolibrary.org/obo/UBERON_0000956 organism part http://www.ebi.ac.uk/efo/EFO_0000635 @@ -1271,8 +1270,8 @@ synovial membrane synovial membrane of synovial joint http://purl.obolibrary.org Synovial sarcoma tumor tissue synovial sarcoma http://purl.obolibrary.org/obo/MONDO_0010434 disease http://www.ebi.ac.uk/efo/EFO_0000408 T cell T cell http://purl.obolibrary.org/obo/CL_0000084 cell type http://www.ebi.ac.uk/efo/EFO_0000324 T cells T cell http://purl.obolibrary.org/obo/CL_0000084 cell type http://www.ebi.ac.uk/efo/EFO_0000324 -T-ALL T-cell adult acute lymphocytic leukemia http://purl.obolibrary.org/obo/MONDO_0003539 disease http://www.ebi.ac.uk/efo/EFO_0000408 -T-ALL diagnostic sample acute T cell leukemia http://purl.obolibrary.org/obo/MONDO_0003540 disease http://www.ebi.ac.uk/efo/EFO_0000408 +T-ALL T-cell acute lymphoblastic leukemia http://purl.obolibrary.org/obo/MONDO_0004963 disease http://www.ebi.ac.uk/efo/EFO_0000408 +T-ALL diagnostic sample T-cell acute lymphoblastic leukemia http://purl.obolibrary.org/obo/MONDO_0004963 disease http://www.ebi.ac.uk/efo/EFO_0000408 T-cell lymphoblasts T cell http://purl.obolibrary.org/obo/CL_0000084 cell type http://www.ebi.ac.uk/efo/EFO_0000324 T47D T-47D cell http://purl.obolibrary.org/obo/CLO_0009251 cell line http://purl.obolibrary.org/obo/CLO_0000031 tamoxifen tamoxifen http://purl.obolibrary.org/obo/CHEBI_41774 treatment http://www.ebi.ac.uk/efo/EFO_0000727 diff --git a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml index d8b840107e..19cca0175d 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml @@ -56,7 +56,8 @@ - + + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> - + - + @@ -42,7 +44,7 @@ + class="ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult"/> + sql-type="VARCHAR(255)" index="INVESTIGATION_NAME"/> + sql-type="VARCHAR(255)" index="shortname"/> - + - + + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> - + diff --git a/gemma-core/src/main/resources/ubic/gemma/model/analysis/expression/diff/HitListSize.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/analysis/expression/diff/HitListSize.hbm.xml index 2893a21758..e1717bbc24 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/analysis/expression/diff/HitListSize.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/analysis/expression/diff/HitListSize.hbm.xml @@ -18,7 +18,7 @@ - + ubic.gemma.model.analysis.expression.diff.Direction true diff --git a/gemma-core/src/main/resources/ubic/gemma/model/common/auditAndSecurity/AuditEvent.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/common/auditAndSecurity/AuditEvent.hbm.xml index c2599a41e4..78d68beac9 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/common/auditAndSecurity/AuditEvent.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/common/auditAndSecurity/AuditEvent.hbm.xml @@ -10,11 +10,11 @@ - + + sql-type="VARCHAR(255)" index="AUDIT_EVENT_ACTION"/> ubic.gemma.model.common.auditAndSecurity.AuditAction true diff --git a/gemma-core/src/main/resources/ubic/gemma/model/common/auditAndSecurity/curation/CurationDetails.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/common/auditAndSecurity/curation/CurationDetails.hbm.xml index cfa266f2ac..c5e1bd2317 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/common/auditAndSecurity/curation/CurationDetails.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/common/auditAndSecurity/curation/CurationDetails.hbm.xml @@ -1,7 +1,7 @@ + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> @@ -29,7 +29,7 @@ - + diff --git a/gemma-core/src/main/resources/ubic/gemma/model/common/description/Characteristic.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/common/description/Characteristic.hbm.xml index ca8e1c6548..f4e2107808 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/common/description/Characteristic.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/common/description/Characteristic.hbm.xml @@ -23,11 +23,11 @@ + sql-type="VARCHAR(255)" index="CHARACTERISTIC_VALUE"/> + sql-type="VARCHAR(255)" index="CHARACTERISTIC_CATEGORY"/> + sql-type="VARCHAR(255)" index="CHARACTERISTIC_EVIDENCE_CODE"/> ubic.gemma.model.association.GOEvidenceCode true diff --git a/gemma-core/src/main/resources/ubic/gemma/model/common/measurement/Measurement.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/common/measurement/Measurement.hbm.xml index 011b3d3798..7be9c3c99d 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/common/measurement/Measurement.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/common/measurement/Measurement.hbm.xml @@ -1,7 +1,7 @@ + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> @@ -12,27 +12,30 @@ - + ubic.gemma.model.common.measurement.MeasurementType true - + - + ubic.gemma.model.common.measurement.MeasurementKind true - + - + ubic.gemma.model.common.quantitationtype.PrimitiveType true diff --git a/gemma-core/src/main/resources/ubic/gemma/model/expression/arrayDesign/AlternateName.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/expression/arrayDesign/AlternateName.hbm.xml index ef57c9dece..80a3814b97 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/expression/arrayDesign/AlternateName.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/expression/arrayDesign/AlternateName.hbm.xml @@ -12,7 +12,7 @@ - + \ No newline at end of file diff --git a/gemma-core/src/main/resources/ubic/gemma/model/expression/designElement/CompositeSequence.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/expression/designElement/CompositeSequence.hbm.xml index c20a381b5f..b58d7060a3 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/expression/designElement/CompositeSequence.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/expression/designElement/CompositeSequence.hbm.xml @@ -12,7 +12,7 @@ - + diff --git a/gemma-core/src/main/resources/ubic/gemma/model/expression/experiment/Geeq.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/expression/experiment/Geeq.hbm.xml index 15d790a5bd..0a25744c9d 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/expression/experiment/Geeq.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/expression/experiment/Geeq.hbm.xml @@ -1,7 +1,7 @@ + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> @@ -12,22 +12,28 @@ - + - + - + - + - + - + @@ -39,7 +45,8 @@ - + diff --git a/gemma-core/src/main/resources/ubic/gemma/model/genome/ChromosomeFeature.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/genome/ChromosomeFeature.hbm.xml index 9d59919c3c..1220598ad3 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/genome/ChromosomeFeature.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/genome/ChromosomeFeature.hbm.xml @@ -1,7 +1,7 @@ + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> + sql-type="VARCHAR(255)" index="CHROMOSOME_FEATURE_NAME"/> + sql-type="VARCHAR(255)" index="PREVIOUS_NCBI_ID"/> @@ -33,7 +33,7 @@ abstract="false"> + sql-type="VARCHAR(255)" index="NCBI_GI"/> @@ -61,11 +61,11 @@ sql-type="text"/> - + + sql-type="VARCHAR(255)" index="ENSEMBL_ID"/> diff --git a/gemma-core/src/main/resources/ubic/gemma/model/genome/PhysicalLocation.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/genome/PhysicalLocation.hbm.xml index a9b2d33717..6c16fb932d 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/genome/PhysicalLocation.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/genome/PhysicalLocation.hbm.xml @@ -25,7 +25,7 @@ sql-type="VARCHAR(255)"/> - + \ No newline at end of file diff --git a/gemma-core/src/main/resources/ubic/gemma/model/genome/Taxon.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/genome/Taxon.hbm.xml index c8f4171f4b..ca8e6dedcb 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/genome/Taxon.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/genome/Taxon.hbm.xml @@ -4,31 +4,34 @@ "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/gemma-core/src/main/resources/ubic/gemma/model/genome/biosequence/BioSequence.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/genome/biosequence/BioSequence.hbm.xml index 6bd0f0115f..8c101aa1c7 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/genome/biosequence/BioSequence.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/genome/biosequence/BioSequence.hbm.xml @@ -4,34 +4,34 @@ "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + @@ -49,25 +49,25 @@ - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneAlias.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneAlias.hbm.xml index a701b63304..18830b429e 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneAlias.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneAlias.hbm.xml @@ -12,7 +12,7 @@ - + \ No newline at end of file diff --git a/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneSet.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneSet.hbm.xml index 6737693eb8..7ca18cfd54 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneSet.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneSet.hbm.xml @@ -1,7 +1,7 @@ + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> @@ -17,7 +17,7 @@ + sql-type="VARCHAR(255)" index="GENE_SET_NAME"/> - - + + diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/BaseAnalyzerConfigurationTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/BaseAnalyzerConfigurationTest.java index 484c342053..05cfe53007 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/BaseAnalyzerConfigurationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/BaseAnalyzerConfigurationTest.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.RandomStringUtils; import org.junit.After; import org.junit.Before; +import org.junit.experimental.categories.Category; import org.springframework.beans.factory.annotation.Autowired; import ubic.basecode.dataStructure.matrix.DoubleMatrix; import ubic.basecode.io.ByteArrayConverter; @@ -31,6 +32,7 @@ import ubic.gemma.core.analysis.service.ExpressionDataMatrixService; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix; import ubic.gemma.core.util.test.BaseSpringContextTest; +import ubic.gemma.core.util.test.category.SlowTest; import ubic.gemma.model.common.quantitationtype.*; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.arrayDesign.TechnologyType; diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/OneWayAnovaAnalyzerTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/OneWayAnovaAnalyzerTest.java index 4fc5680b1c..8590ee6655 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/OneWayAnovaAnalyzerTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/OneWayAnovaAnalyzerTest.java @@ -35,6 +35,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assume.assumeTrue; /** * Tests the one way anova analyzer. See test/data/stat-tests/README.txt for R code. @@ -52,10 +53,7 @@ public class OneWayAnovaAnalyzerTest extends BaseAnalyzerConfigurationTest { @Test public void testOneWayAnova() throws Exception { - if ( !connected ) { - log.warn( "Could not establish R connection. Skipping test ..." ); - return; - } + assumeTrue( "Could not establish R connection. Skipping test ...", connected ); super.configureTestDataForOneWayAnova(); @@ -86,10 +84,7 @@ public void testOneWayAnova() throws Exception { @Test public void testOnewayAnovaB() throws Exception { - if ( !connected ) { - log.warn( "Could not establish R connection. Skipping test ..." ); - return; - } + assumeTrue( "Could not establish R connection. Skipping test ...", connected ); super.configureTestDataForOneWayAnova(); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TTestAnalyzerTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TTestAnalyzerTest.java index e9a52ddb06..c02086ec29 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TTestAnalyzerTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TTestAnalyzerTest.java @@ -20,7 +20,6 @@ import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; -import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; import ubic.gemma.model.analysis.expression.diff.*; import ubic.gemma.model.common.quantitationtype.ScaleType; import ubic.gemma.model.expression.biomaterial.BioMaterial; @@ -35,6 +34,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assume.assumeTrue; /** * See test/data/stat-tests/README.txt for R code. @@ -49,10 +49,7 @@ public class TTestAnalyzerTest extends BaseAnalyzerConfigurationTest { @Test public void testOneSampleTtest() throws Exception { - if ( !connected ) { - log.warn( "Could not establish R connection. Skipping test ..." ); - return; - } + assumeTrue( "Could not establish R connection. Skipping test ...", connected ); this.configureVectors( super.biomaterials, "/data/stat-tests/onesample-ttest-data.txt" ); @@ -127,10 +124,7 @@ public void testOneSampleTtest() throws Exception { @Test public void testTTestWithExpressionExperiment() { - if ( !connected ) { - log.warn( "Could not establish R connection. Skipping test ..." ); - return; - } + assumeTrue( "Could not establish R connection. Skipping test ...", connected ); this.configureMocks(); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithInteractionsAnalyzerTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithInteractionsAnalyzerTest.java index cf0cc383d6..ab017fe9ce 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithInteractionsAnalyzerTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithInteractionsAnalyzerTest.java @@ -31,6 +31,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assume.assumeTrue; /** * Tests the two way anova analyzer with interactions. See test/data/stat-tests/README.txt for R code. @@ -47,10 +48,7 @@ public void testTwoWayAnova() { log.debug( "Testing TwoWayAnova method in " + DiffExAnalyzer.class.getName() ); - if ( !connected ) { - log.warn( "Could not establish R connection. Skipping test ..." ); - return; - } + assumeTrue( "Could not establish R connection. Skipping test ...", connected ); this.configureMocks(); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithoutInteractionsAnalyzerTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithoutInteractionsAnalyzerTest.java index e71ecb2982..1a260706ee 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithoutInteractionsAnalyzerTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithoutInteractionsAnalyzerTest.java @@ -32,6 +32,7 @@ import java.util.List; import static org.junit.Assert.*; +import static org.junit.Assume.assumeTrue; /** * Tests the two way anova analyzer. See test/data/stat-tests/README.txt for R code. @@ -51,10 +52,7 @@ public void testTwoWayAnova() { log.debug( "Testing getPValues method in " + DiffExAnalyzer.class.getName() ); - if ( !connected ) { - log.warn( "Could not establish R connection. Skipping test ..." ); - return; - } + assumeTrue( "Could not establish R connection. Skipping test ...", connected ); this.configureMocks(); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ExpressionDataSVDTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ExpressionDataSVDTest.java index 3b244fced9..99ecd07c47 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ExpressionDataSVDTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ExpressionDataSVDTest.java @@ -1,8 +1,8 @@ /* * The Gemma project - * + * * Copyright (c) 2008 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -86,7 +86,7 @@ public void testMatrixReconstruct() { * header=T, row.names=1) * testdata.s <- testdata * for(i in 1:5) { - * testdata.s <- t(scale(t(scale(testdata.s)))); + * testdata.s <- t(scale(t(scale(testdata.s)))); * } * s<-svd(testdata.s) * s$d @@ -132,7 +132,7 @@ public void testEigenvalues() throws SVDException { /* * See testEigenvalues - * + * *

      * cat( signif( p$sdev ˆ 2 / sum( p$sdev ˆ 2 ), 3 ), sep = ",\n" )
      * 
diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceTest.java index f663d63fd1..71b0caffb7 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceTest.java @@ -89,8 +89,8 @@ public void testComBatOnEE() throws Exception { assertNotNull( newee ); newee = expressionExperimentService.thawLite( newee ); processedExpressionDataVectorService.computeProcessedExpressionData( newee ); - try (InputStream deis = this.getClass() - .getResourceAsStream( "/data/loader/expression/geo/gse18162Short/design.txt" )) { + try ( InputStream deis = this.getClass() + .getResourceAsStream( "/data/loader/expression/geo/gse18162Short/design.txt" ) ) { experimentalDesignImporter.importDesign( newee, deis ); } ExpressionDataDoubleMatrix comBat = correctionService.comBat( newee ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/RNASeqBatchInfoPopulationTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/RNASeqBatchInfoPopulationTest.java index 961a72c616..f0382cdf36 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/RNASeqBatchInfoPopulationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/RNASeqBatchInfoPopulationTest.java @@ -19,15 +19,11 @@ package ubic.gemma.core.analysis.preprocess.batcheffects; -import java.util.Collection; -import java.util.Map; - import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; import org.springframework.beans.factory.annotation.Autowired; - import org.springframework.core.io.ClassPathResource; import ubic.basecode.util.FileTools; import ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest; @@ -44,6 +40,9 @@ import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.Settings; +import java.util.Collection; +import java.util.Map; + import static org.junit.Assert.*; /** @@ -169,6 +168,7 @@ public void testGSE14285OneBatch() throws Exception { * batch info. */ @Test + @Category(SlowTest.class) public void testGSE156689NoBatchinfo() throws Exception { geoService.setGeoDomainObjectGenerator( new GeoDomainObjectGeneratorLocal( FileTools.resourceToPath( "/data/analysis/preprocess/batcheffects/" ) ) ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/svd/SVDServiceImplTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/svd/SVDServiceImplTest.java index 0f9f03de0a..861fafa329 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/svd/SVDServiceImplTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/svd/SVDServiceImplTest.java @@ -37,6 +37,7 @@ /** * @author paul */ +@Category(SlowTest.class) public class SVDServiceImplTest extends AbstractGeoServiceTest { @Autowired diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceTest.java index e413e4edba..e2f73ec1f4 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceTest.java @@ -2,12 +2,10 @@ import org.junit.After; import org.junit.Test; -import org.junit.experimental.categories.Category; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.core.context.SecurityContext; import org.springframework.security.core.context.SecurityContextHolder; import ubic.gemma.core.util.test.BaseSpringContextTest; -import ubic.gemma.core.util.test.category.SlowTest; import ubic.gemma.model.expression.experiment.BatchEffectType; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; @@ -32,7 +30,6 @@ public void tearDown() { } @Test - @Category(SlowTest.class) public void testRecalculateBatchInfo() { ee = getTestPersistentBasicExpressionExperiment(); assertNull( ee.getBatchEffect() ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/ExpressionExperimentBibRefFinderTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/ExpressionExperimentBibRefFinderTest.java index d3800e70b0..5b1607cb6f 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/ExpressionExperimentBibRefFinderTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/ExpressionExperimentBibRefFinderTest.java @@ -1,8 +1,8 @@ /* * The Gemma project - * + * * Copyright (c) 2007 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -18,34 +18,26 @@ */ package ubic.gemma.core.loader.entrez.pubmed; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.junit.Test; import org.junit.experimental.categories.Category; -import ubic.gemma.core.util.test.category.SlowTest; +import ubic.gemma.core.util.test.category.GeoTest; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.DatabaseEntry; import ubic.gemma.model.common.description.ExternalDatabase; import ubic.gemma.model.expression.experiment.ExpressionExperiment; -import javax.net.ssl.SSLException; -import java.io.IOException; -import java.net.UnknownHostException; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assume.assumeNoException; +import static org.junit.Assert.*; +import static ubic.gemma.core.util.test.Assumptions.assumeThatResourceIsAvailable; /** * @author pavlidis */ +@Category(GeoTest.class) public class ExpressionExperimentBibRefFinderTest { - private static final Log log = LogFactory.getLog( ExpressionExperimentBibRefFinderTest.class.getName() ); - @Test - @Category(SlowTest.class) public void testLocatePrimaryReference() throws Exception { + assumeThatResourceIsAvailable( "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi" ); ExpressionExperimentBibRefFinder finder = new ExpressionExperimentBibRefFinder(); ExpressionExperiment ee = ExpressionExperiment.Factory.newInstance(); DatabaseEntry de = DatabaseEntry.Factory.newInstance(); @@ -54,25 +46,15 @@ public void testLocatePrimaryReference() throws Exception { de.setAccession( "GSE3023" ); de.setExternalDatabase( ed ); ee.setAccession( de ); - try { - BibliographicReference bibref = null; - for ( int i = 0; i < 3; i++ ) { - bibref = finder.locatePrimaryReference( ee ); - if ( bibref != null ) - break; - Thread.sleep( 1000 ); - } - assertNotNull( bibref ); - assertEquals( "Differential gene expression in anatomical compartments of the human eye.", - bibref.getTitle() ); - } catch ( Exception e ) { - checkCause( e ); - } - + BibliographicReference bibref = finder.locatePrimaryReference( ee ); + assertNotNull( bibref ); + assertEquals( "Differential gene expression in anatomical compartments of the human eye.", + bibref.getTitle() ); } @Test public void testLocatePrimaryReferenceInvalidGSE() throws Exception { + assumeThatResourceIsAvailable( "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi" ); ExpressionExperimentBibRefFinder finder = new ExpressionExperimentBibRefFinder(); ExpressionExperiment ee = ExpressionExperiment.Factory.newInstance(); DatabaseEntry de = DatabaseEntry.Factory.newInstance(); @@ -81,34 +63,7 @@ public void testLocatePrimaryReferenceInvalidGSE() throws Exception { de.setAccession( "GSE30231111111111111" ); de.setExternalDatabase( ed ); ee.setAccession( de ); - try { - BibliographicReference bibref = finder.locatePrimaryReference( ee ); - assert ( bibref == null ); - } catch ( Exception e ) { - checkCause( e ); - } - } - - private void checkCause( Exception e ) throws Exception { - IOException k; - if ( e instanceof IOException ) { - k = ( IOException ) e; - } else if ( e.getCause() instanceof IOException ) { - k = ( IOException ) e.getCause(); - } else { - throw e; - } - if ( k instanceof UnknownHostException || k instanceof SSLException ) { - assumeNoException( e ); - } else if ( k.getMessage().contains( "503" ) ) { - assumeNoException( "Test skipped due to a 503 error from NCBI", e ); - } else if ( k.getMessage().contains( "502" ) ) { - log.warn( "Test skipped due to a 502 error from NCBI" ); - } else if ( k.getMessage().contains( "500" ) ) { - log.warn( "Test skipped due to a 500 error from NCBI" ); - } else { - throw e; - } + BibliographicReference bibref = finder.locatePrimaryReference( ee ); + assertTrue( bibref == null ); } - } diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLFetcherTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLFetcherTest.java index f4ff4477d1..9a78ba3b48 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLFetcherTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLFetcherTest.java @@ -61,7 +61,6 @@ public final void testRetrieveByHTTP() { } @Test - @Category(SlowTest.class) public final void testRetrieveByHTTP2() { try { BibliographicReference br = pmf.retrieveByHTTP( 24850731 ); @@ -83,14 +82,13 @@ public final void testRetrieveByHTTP2() { * 23865096 is a NCBI bookshelf article, not a paper */ @Test - @Category({ SlowTest.class, PubMedTest.class }) public final void testRetrieveByHTTPBookshelf() { try { BibliographicReference br = pmf.retrieveByHTTP( 23865096 ); assertNotNull( br ); - assertEquals( "Tatton-Brown, Katrina; Rahman, Nazneen", br.getAuthorList() ); + assertEquals( "Ocansey, Sharon; Tatton-Brown, Katrina", br.getAuthorList() ); assertEquals( "GeneReviews", br.getPublication().substring( 0, "GeneReviews".length() ) ); assertEquals( "EZH2-Related Overgrowth", br.getTitle() ); @@ -103,7 +101,6 @@ public final void testRetrieveByHTTPBookshelf() { } @Test - @Category(SlowTest.class) public final void testRetrieveByHTTPNotFound() { try { BibliographicReference br = pmf.retrieveByHTTP( 1517311444 ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLParserTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLParserTest.java index 37ee30a997..ebc71cbf39 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLParserTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLParserTest.java @@ -45,7 +45,6 @@ /** * @author pavlidis */ -@Category(SlowTest.class) public class PubMedXMLParserTest { private static final Log log = LogFactory.getLog( PubMedXMLParserTest.class.getName() ); @@ -175,6 +174,7 @@ public void testParseMesh() throws Exception { * This uses a medline-format file, instead of the pubmed xml files we get from the eutils. */ @Test + @Category(SlowTest.class) public void testParseMulti() throws Exception { try { testStream = new GZIPInputStream( new ClassPathResource( "/data/loader/medline.multi.xml.gz" ).getInputStream() ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorTest.java index 00bb31c100..ebd02b2597 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorTest.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.StringUtils; import org.junit.Assume; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; import ubic.basecode.util.FileTools; @@ -121,6 +122,7 @@ public void testAssignSequencesToDesignElementsMissingSequence() throws Exceptio } @Test + @Ignore("See https://github.com/PavlidisLab/Gemma/issues/1082 for details") public void testFetchAndLoadWithIdentifiers() throws Exception { String fastacmdExe = Settings.getString( SimpleFastaCmd.FASTA_CMD_ENV_VAR ); Assume.assumeTrue( "No fastacmd executable is configured, skipping test.", fastacmdExe != null ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/GeoConverterTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/GeoConverterTest.java index 0b3ae5a0ba..b9959c3105 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/GeoConverterTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/GeoConverterTest.java @@ -150,7 +150,6 @@ public void testConvertGSE106() throws Exception { * */ @Test - @Category(SlowTest.class) public void testConvertGSE18Stress() throws Exception { InputStream is = new GZIPInputStream( new ClassPathResource( "/data/loader/expression/geo/gse18short/GSE18.soft.gz" ).getInputStream() ); @@ -410,7 +409,6 @@ public void testConvertGse59() throws Exception { */ @SuppressWarnings("unchecked") @Test - @Category(SlowTest.class) public void testConvertGSE60() throws Exception { InputStream is = new GZIPInputStream( new ClassPathResource( "/data/loader/expression/geo/gse60Short/GSE60_family.soft.gz" ).getInputStream() ); @@ -624,7 +622,6 @@ public final void testGSE44903() throws Exception { */ @SuppressWarnings("unchecked") @Test - @Category(SlowTest.class) public final void testGSE8872() throws Exception { InputStream is = new GZIPInputStream( new ClassPathResource( "/data/loader/expression/geo/gse8872short/GSE8872_family.soft.gz" ).getInputStream() ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyCacheTest.java b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyCacheTest.java index ec68bed950..72d5a5c8d2 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyCacheTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyCacheTest.java @@ -1,5 +1,6 @@ package ubic.gemma.core.ontology; +import org.junit.After; import org.junit.Before; import org.junit.Test; import org.springframework.cache.concurrent.ConcurrentMapCache; @@ -24,13 +25,18 @@ public class OntologyCacheTest { @Before public void setUp() { ontologyService = mock( OntologyService.class ); - ontologyCache = new OntologyCache( new ConcurrentMapCache( "parents" ), new ConcurrentMapCache( "children" ) ); + ontologyCache = new OntologyCache( new ConcurrentMapCache( "search" ), new ConcurrentMapCache( "parents" ), new ConcurrentMapCache( "children" ) ); term1 = new OntologyTermSimple( "http://example.com/term1", "term1" ); term2 = new OntologyTermSimple( "http://example.com/term2", "term2" ); term3 = new OntologyTermSimple( "http://example.com/term3", "term3" ); term4 = new OntologyTermSimple( "http://example.com/term3", "term4" ); } + @After + public void resetMocks() { + reset( ontologyService ); + } + @Test public void testLookupByMaximalSubset() { ontologyCache.getChildren( ontologyService, Collections.singleton( term1 ), true, true ); @@ -54,12 +60,14 @@ public void testLookupByMaximalSubset() { } @Test - public void testLookupByEnumeration() { + public void testLookupByMaximalSubsetWhenMinSubsetSizeIsSet() { ontologyCache.getChildren( ontologyService, Collections.singleton( term1 ), true, true ); verify( ontologyService ).getChildren( Collections.singleton( term1 ), true, true ); - // a k-3 subset exist (i.e. [term1]) but only via enumeration + ontologyCache.setMinSubsetSize( 2 ); + + // a subset of size 1 exists, but it cannot be used ontologyCache.getChildren( ontologyService, Arrays.asList( term1, term2, term3, term4 ), true, true ); - verify( ontologyService, atMostOnce() ).getChildren( Collections.singleton( term1 ), true, true ); + verify( ontologyService ).getChildren( new HashSet<>( Arrays.asList( term1, term2, term3, term4 ) ), true, true ); } } \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyLoadingTest.java b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyLoadingTest.java index 595e112856..1814a302bb 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyLoadingTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyLoadingTest.java @@ -70,6 +70,12 @@ public class OntologyLoadingTest extends AbstractJUnit4SpringContextTests { @Autowired private UberonOntologyService uberon; + @Autowired + private ObiService obi; + + @Autowired + private MouseDevelopmentOntologyService mdo; + @Test public void testThatChebiDoesNotHaveInferenceEnabled() { assertThat( chebi.getInferenceMode() ).isEqualTo( OntologyService.InferenceMode.NONE ); @@ -84,7 +90,7 @@ public void testThatTGEMODoesNotProcessImports() { @Category(SlowTest.class) public void testInitializeAllOntologies() { // these are notoriously slow, so we skip them - List ignoredOntologies = Arrays.asList( efo, chebi, mp, mondo, clo, cl, hpo, uberon ); + List ignoredOntologies = Arrays.asList( efo, chebi, mp, mondo, clo, cl, hpo, uberon, obi, mdo ); List services = new ArrayList<>(); List> futures = new ArrayList<>(); for ( OntologyService os : ontologyServices ) { diff --git a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyServiceTest.java index 50bb012d0a..0979f725a2 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyServiceTest.java @@ -6,6 +6,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.cache.CacheManager; +import org.springframework.cache.concurrent.ConcurrentMapCacheManager; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.core.task.AsyncTaskExecutor; @@ -108,7 +109,7 @@ public TaskExecutor ontologyTaskExecutor() { @Bean public CacheManager cacheManager() { - return mock(); + return new ConcurrentMapCacheManager(); } } @@ -157,10 +158,7 @@ public void testTermLackingLabelIsIgnored() { when( chebiOntologyService.getTerm( "http://test" ) ).thenReturn( new OntologyTermSimple( "http://test", null ) ); assertNull( ontologyService.getTerm( "http://test" ) ); - // this is covering the case when baseCode defaults to the local name or URI when a term does not have a label - when( chebiOntologyService.getTerm( "http://test" ) ).thenReturn( new OntologyTermSimple( "http://test", "http://test" ) ); - assertNull( ontologyService.getTerm( "http://test" ) ); - + // provide the term from another ontology, but with a label this time when( obiService.isOntologyLoaded() ).thenReturn( true ); when( obiService.getTerm( "http://test" ) ).thenReturn( new OntologyTermSimple( "http://test", "this is a test term" ) ); assertNotNull( ontologyService.getTerm( "http://test" ) ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultSetTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultSetTest.java new file mode 100644 index 0000000000..496b0478e0 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultSetTest.java @@ -0,0 +1,84 @@ +package ubic.gemma.core.search; + +import org.assertj.core.data.Index; +import org.junit.Test; +import ubic.gemma.model.common.search.SearchSettings; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; + +import java.util.Collections; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static ubic.gemma.core.util.test.Maps.map; + +public class SearchResultSetTest { + + @Test + public void test() { + SearchSettings settings = SearchSettings.expressionExperimentSearch( "test" ); + SearchResultSet results = new SearchResultSet<>( settings ); + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.5, null, "test" ) ) ); + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.6, null, "test" ) ) ); + assertThat( results ).hasSize( 1 ) + .extracting( SearchResult::getScore ).containsExactly( 0.6 ); + } + + @Test + public void testResultObjectIsRetainedWhenReplacingAResult() { + SearchSettings settings = SearchSettings.expressionExperimentSearch( "test" ); + SearchResultSet results = new SearchResultSet<>( settings ); + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, new ExpressionExperiment() {{ + setId( 1L ); + }}, 0.5, null, "test" ) ) ); + // replaced by a better result without a result object + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.6, null, "test" ) ) ); + assertThat( results ).satisfiesExactly( sr -> { + assertThat( sr.getResultId() ).isEqualTo( 1L ); + assertThat( sr.getResultObject() ).isNotNull(); + assertThat( sr.getScore() ).isEqualTo( 0.6 ); + } ); + } + + @Test + public void testAddWhenMaxResultsIsReached() { + SearchSettings settings = SearchSettings.expressionExperimentSearch( "test" ) + .withMaxResults( 3 ); + SearchResultSet results = new SearchResultSet<>( settings ); + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.5, null, "test" ) ) ); + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 2L, 0.6, null, "test" ) ) ); + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 3L, 0.5, null, "test" ) ) ); + // ignored + assertFalse( results.add( SearchResult.from( ExpressionExperiment.class, 4L, 0.6, null, "test" ) ) ); + assertThat( results ).hasSize( 3 ); + // this is allowed though as it replaces a previosu result + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 3L, 0.6, null, "test" ) ) ); + assertThat( results ).hasSize( 3 ) + .extracting( SearchResult::getResultId ) + .containsExactlyInAnyOrder( 1L, 2L, 3L ); + } + + @Test + public void testMergingHighlightWhenReplacingAResult() { + SearchSettings settings = SearchSettings.expressionExperimentSearch( "test" ); + SearchResultSet results = new SearchResultSet<>( settings ); + results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.5, Collections.singletonMap( "a", "a" ), "test" ) ); + results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.6, Collections.singletonMap( "b", "b" ), "test" ) ); + assertThat( results ).hasSize( 1 ) + .extracting( SearchResult::getHighlights ) + .satisfies( h -> { + assertThat( h ).containsEntry( "a", "a" ).containsEntry( "b", "b" ); + }, Index.atIndex( 0 ) ); + } + + @Test + public void testMergingHighlightWhenRetainingAnExistingResult() { + SearchSettings settings = SearchSettings.expressionExperimentSearch( "test" ); + SearchResultSet results = new SearchResultSet<>( settings ); + results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.6, Collections.singletonMap( "a", "a" ), "test" ) ); + results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.5, map( "a", "b", "b", "b" ), "test" ) ); + assertThat( results ).hasSize( 1 ) + .extracting( SearchResult::getHighlights ) + .containsExactly( map( "a", "a", "b", "b" ) ); + } +} \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultTest.java index e69b8b51b4..33b45d1136 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultTest.java @@ -4,8 +4,10 @@ import ubic.gemma.model.common.Identifiable; import javax.annotation.Nullable; +import java.util.Collections; import static org.assertj.core.api.Assertions.assertThat; +import static ubic.gemma.core.util.test.Maps.map; /** * Tests for {@link SearchResult}. @@ -30,31 +32,32 @@ public Long getId() { @Test public void testResultObject() { - SearchResult sr = SearchResult.from( FooBar.class, new FooBar( 1L ), 1.0, "test object" ); + SearchResult sr = SearchResult.from( FooBar.class, new FooBar( 1L ), 1.0, Collections.singletonMap( "a", "b" ), "test object" ); assertThat( sr.getScore() ).isEqualTo( 1.0 ); - assertThat( sr.getHighlights() ).isNull(); + assertThat( sr.getHighlights() ).isEqualTo( map( "a", "b" ) ); + assertThat( sr ).hasToString( String.format( "FooBar Id=1 Score=%.2f Highlights=a Source=test object [Not Filled]", 1.0 ) ); } @Test(expected = IllegalArgumentException.class) public void testResultObjectWithNullId() { - SearchResult.from( FooBar.class, new FooBar( null ), 1.0, "test object" ); + SearchResult.from( FooBar.class, new FooBar( null ), 1.0, null, "test object" ); } @Test public void testSetResultObject() { - SearchResult sr = SearchResult.from( FooBar.class, 1L, 1.0, "test object" ); + SearchResult sr = SearchResult.from( FooBar.class, 1L, 1.0, null, "test object" ); sr.setResultObject( new FooBar( 1L ) ); } @Test(expected = IllegalArgumentException.class) public void testSetResultObjectWithNullId() { - SearchResult sr = SearchResult.from( FooBar.class, 1L, 1.0, "test object" ); + SearchResult sr = SearchResult.from( FooBar.class, 1L, 1.0, null, "test object" ); sr.setResultObject( new FooBar( null ) ); } @Test(expected = IllegalArgumentException.class) public void testSetResultObjectWithDifferentId() { - SearchResult sr = SearchResult.from( FooBar.class, 1L, 1.0, "test object" ); + SearchResult sr = SearchResult.from( FooBar.class, 1L, 1.0, null, "test object" ); sr.setResultObject( new FooBar( 2L ) ); } diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceIntegrationTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceIntegrationTest.java index a09eddeac2..7ff836de6a 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceIntegrationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceIntegrationTest.java @@ -129,7 +129,7 @@ public void setUp() throws Exception { gene.setNcbiGeneId( new Integer( geneNcbiId ) ); geneService.update( gene ); - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); } @After diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceTest.java index 76f0a06531..19414bdafc 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceTest.java @@ -7,7 +7,10 @@ import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.security.test.context.support.WithMockUser; +import org.springframework.security.test.context.support.WithSecurityContextTestExecutionListener; import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.TestExecutionListeners; import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.source.OntologySearchSource; @@ -21,10 +24,12 @@ import java.util.Collections; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertNull; import static org.mockito.Mockito.*; @ContextConfiguration +@TestExecutionListeners(WithSecurityContextTestExecutionListener.class) public class SearchServiceTest extends AbstractJUnit4SpringContextTests { private static final Taxon rat = Taxon.Factory.newInstance( "Rattus norvegicus", "rat", 192, false ); @@ -47,6 +52,11 @@ public TaxonService taxonService() { when( ts.loadAll() ).thenReturn( Collections.singletonList( rat ) ); return ts; } + + @Bean + public SearchSource fieldAwareSearchSource() { + return mock( FieldAwareSearchSource.class ); + } } @Autowired @@ -56,6 +66,10 @@ public TaxonService taxonService() { @Qualifier("databaseSearchSource") private SearchSource databaseSearchSource; + @Autowired + @Qualifier("fieldAwareSearchSource") + private SearchSource fieldAwareSearchSource; + @Autowired private OntologyService ontologyService; @@ -67,13 +81,24 @@ public void tearDown() { reset( databaseSearchSource, ontologyService ); } + @Test + public void testGetFields() { + when( ( ( FieldAwareSearchSource ) fieldAwareSearchSource ).getFields( ExpressionExperiment.class ) ) + .thenReturn( Collections.singleton( "shortName" ) ); + assertThat( searchService.getFields( ExpressionExperiment.class ) ) + .contains( "shortName" ); + verify( ( FieldAwareSearchSource ) fieldAwareSearchSource ).getFields( ExpressionExperiment.class ); + } + @Test public void test_whenTaxonIsNameIsUsedInQuery_thenAddTaxonToSearchSettings() throws SearchException { + when( databaseSearchSource.accepts( any() ) ).thenReturn( true ); SearchSettings settings = SearchSettings.builder() .resultType( Gene.class ) .query( "the best rat in the universe" ) .build(); searchService.search( settings ); + verify( databaseSearchSource ).accepts( settings.withTaxon( rat ) ); verify( databaseSearchSource ).searchGene( settings.withTaxon( rat ) ); } @@ -94,13 +119,14 @@ public void searchExpressionExperimentsByUri_whenQueryIsAUri_thenEnsureTheUriIsU .build(); searchService.search( settings ); verify( ontologyService ).getTerm( "http://purl.obolibrary.org/obo/DOID_14602" ); - verify( ontologyService ).findTerms( "http://purl.obolibrary.org/obo/DOID_14602" ); verifyNoMoreInteractions( ontologyService ); verify( characteristicService ).findExperimentsByUris( Collections.singleton( "http://purl.obolibrary.org/obo/DOID_14602" ), null, 10, true, false ); } @Test + @WithMockUser public void searchExpressionExperiment() throws SearchException { + when( databaseSearchSource.accepts( any() ) ).thenReturn( true ); SearchSettings settings = SearchSettings.builder() .query( "http://purl.obolibrary.org/obo/DOID_14602" ) .resultType( ExpressionExperiment.class ) @@ -111,6 +137,8 @@ public void searchExpressionExperiment() throws SearchException { .thenReturn( Collections.singletonMap( ExpressionExperiment.class, Collections.singletonMap( "test", Collections.singleton( ee ) ) ) ); SearchService.SearchResultMap results = searchService.search( settings ); + verify( databaseSearchSource ).accepts( settings ); + verify( databaseSearchSource ).searchExpressionExperiment( settings ); verify( characteristicService ).findExperimentsByUris( Collections.singleton( "http://purl.obolibrary.org/obo/DOID_14602" ), null, 5000, false, false ); assertNull( results.getByResultObjectType( ExpressionExperiment.class ).iterator().next().getResultObject() ); // since EE is a proxy, only its ID should be accessed diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceVoConversionTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceVoConversionTest.java index d5c66f2e01..4a31b244f1 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceVoConversionTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceVoConversionTest.java @@ -14,7 +14,6 @@ import ubic.gemma.core.genome.gene.service.GeneSetService; import ubic.gemma.model.IdentifiableValueObject; import ubic.gemma.model.analysis.expression.diff.ContrastResult; -import ubic.gemma.model.association.phenotype.PhenotypeAssociation; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.BibliographicReferenceValueObject; @@ -31,7 +30,6 @@ import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.gene.DatabaseBackedGeneSetValueObject; import ubic.gemma.model.genome.gene.GeneSet; -import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService; import ubic.gemma.persistence.service.expression.experiment.BlacklistedEntityService; @@ -90,7 +88,6 @@ static class SearchServiceVoConversionTestContextConfiguration extends SearchSer private ExpressionExperiment ee; private ExpressionExperimentValueObject eevo; private GeneSet gs; - private CharacteristicValueObject phenotypeAssociation; @Before public void setUp() { @@ -106,7 +103,6 @@ public void setUp() { eevo.setId( 12L ); gs = new GeneSet(); gs.setId( 13L ); - phenotypeAssociation = new CharacteristicValueObject( 14L ); when( arrayDesignService.loadValueObject( any( ArrayDesign.class ) ) ).thenAnswer( a -> new ArrayDesignValueObject( a.getArgument( 0, ArrayDesign.class ) ) ); //noinspection unchecked when( arrayDesignService.loadValueObjects( anyCollection() ) ).thenAnswer( a -> ( ( Collection ) a.getArgument( 0, Collection.class ) ) @@ -129,14 +125,14 @@ public void tearDown() { @Test @WithMockUser public void testConvertArrayDesign() { - searchService.loadValueObject( SearchResult.from( ArrayDesign.class, ad, 1.0, "test object" ) ); + searchService.loadValueObject( SearchResult.from( ArrayDesign.class, ad, 1.0, null, "test object" ) ); verify( arrayDesignService ).loadValueObject( ad ); } @Test @WithMockUser public void testConvertArrayDesignCollection() { - searchService.loadValueObjects( Collections.singleton( SearchResult.from( ArrayDesign.class, ad, 1.0, "test object" ) ) ); + searchService.loadValueObjects( Collections.singleton( SearchResult.from( ArrayDesign.class, ad, 1.0, null, "test object" ) ) ); verify( arrayDesignService ).loadValueObjects( Collections.singletonList( ad ) ); } @@ -146,14 +142,14 @@ public void testConvertBibliographicReference() { when( bibliographicReferenceService.loadValueObject( any( BibliographicReference.class ) ) ) .thenAnswer( arg -> new BibliographicReferenceValueObject( arg.getArgument( 0, BibliographicReference.class ) ) ); br.setId( 13L ); - searchService.loadValueObject( SearchResult.from( BibliographicReference.class, br, 1.0, "test object" ) ); + searchService.loadValueObject( SearchResult.from( BibliographicReference.class, br, 1.0, null, "test object" ) ); verify( bibliographicReferenceService ).loadValueObject( br ); } @Test @WithMockUser public void testConvertCompositeSequence() { - searchService.loadValueObject( SearchResult.from( CompositeSequence.class, cs, 1.0, "test object" ) ); + searchService.loadValueObject( SearchResult.from( CompositeSequence.class, cs, 1.0, null, "test object" ) ); verify( compositeSequenceService ).loadValueObject( cs ); } @@ -162,32 +158,21 @@ public void testConvertCompositeSequence() { public void testConvertCompositeSequenceCollection() { when( compositeSequenceService.loadValueObjects( any() ) ).thenReturn( Collections.singletonList( new CompositeSequenceValueObject( cs ) ) ); // this is a special case because of how it's implemented - searchService.loadValueObjects( Collections.singleton( SearchResult.from( CompositeSequence.class, cs, 1.0, "test object" ) ) ); + searchService.loadValueObjects( Collections.singleton( SearchResult.from( CompositeSequence.class, cs, 1.0, null, "test object" ) ) ); verify( compositeSequenceService ).loadValueObjects( Collections.singletonList( cs ) ); } @Test @WithMockUser public void testConvertExpressionExperiment() { - searchService.loadValueObject( SearchResult.from( ExpressionExperiment.class, ee, 1.0, "test object" ) ); + searchService.loadValueObject( SearchResult.from( ExpressionExperiment.class, ee, 1.0, null, "test object" ) ); verify( expressionExperimentService ).loadValueObject( ee ); } - @Test - public void testConvertPhenotypeAssociation() { - // this is a complicated one because the result type does not match the entity - assertThat( searchService.loadValueObject( SearchResult.from( PhenotypeAssociation.class, phenotypeAssociation, 1.0, "test object" ) ) ) - .extracting( "resultObject" ) - .isSameAs( phenotypeAssociation ); - assertThat( searchService.loadValueObjects( Collections.singleton( SearchResult.from( PhenotypeAssociation.class, phenotypeAssociation, 1.0, "test object" ) ) ) ) - .extracting( "resultObject" ) - .containsExactly( phenotypeAssociation ); - } - @Test public void testConvertGeneSet() { // this is another complicated one because GeneSetService does not implement BaseVoEnabledService - searchService.loadValueObject( SearchResult.from( GeneSet.class, gs, 1.0, "test object" ) ); + searchService.loadValueObject( SearchResult.from( GeneSet.class, gs, 1.0, null, "test object" ) ); verify( geneSetService ).loadValueObject( gs ); } @@ -195,7 +180,7 @@ public void testConvertGeneSet() { public void testConvertUninitializedResult() { DatabaseBackedGeneSetValueObject gsvo = new DatabaseBackedGeneSetValueObject( gs, new Taxon(), 1L ); when( geneSetService.loadValueObjectById( 13L ) ).thenReturn( gsvo ); - SearchResult> sr = searchService.loadValueObject( SearchResult.from( GeneSet.class, 13L, 1.0, "test object" ) ); + SearchResult> sr = searchService.loadValueObject( SearchResult.from( GeneSet.class, 13L, 1.0, null, "test object" ) ); assertThat( sr ) .isNotNull() .hasFieldOrPropertyWithValue( "resultType", GeneSet.class ) @@ -210,18 +195,18 @@ public void testConvertUninitializedResult() { public void testUnsupportedResultTypeRaisesIllegalArgumentException() { ContrastResult cr = new ContrastResult(); cr.setId( 1L ); - searchService.loadValueObject( SearchResult.from( ContrastResult.class, cr, 1.0, "test object" ) ); + searchService.loadValueObject( SearchResult.from( ContrastResult.class, cr, 1.0, null, "test object" ) ); } @Test(expected = IllegalArgumentException.class) public void testUnsupportedResultTypeInCollectionRaisesIllegalArgumentException() { - searchService.loadValueObjects( Collections.singleton( SearchResult.from( ContrastResult.class, new ContrastResult(), 0.0f, "test object" ) ) ); + searchService.loadValueObjects( Collections.singleton( SearchResult.from( ContrastResult.class, new ContrastResult(), 0.0f, null, "test object" ) ) ); } @Test public void testConvertAlreadyConvertedCollection() { searchService.loadValueObjects( Collections.singletonList( - SearchResult.from( ExpressionExperiment.class, eevo, 0.0f, "test value object" ) ) ); + SearchResult.from( ExpressionExperiment.class, eevo, 0.0f, null, "test value object" ) ) ); verify( expressionExperimentService ).loadValueObjectsByIds( Collections.singletonList( eevo.getId() ) ); } @@ -239,9 +224,9 @@ public void testBlacklistedConversion() { when( expressionExperimentService.loadValueObjects( any() ) ).thenReturn( Collections.singletonList( new ExpressionExperimentValueObject( ee ) ) ); when( blacklistedEntityService.loadValueObjects( any() ) ).thenReturn( Arrays.asList( BlacklistedValueObject.fromEntity( bp ), BlacklistedValueObject.fromEntity( be ) ) ); List>> vos = searchService.loadValueObjects( Arrays.asList( - SearchResult.from( BlacklistedEntity.class, be, 0.0, "test blacklisted object" ), - SearchResult.from( BlacklistedEntity.class, bp, 0.0, "test blacklisted object" ), - SearchResult.from( ExpressionExperiment.class, ee, 1.0, "test object" ) ) ); + SearchResult.from( BlacklistedEntity.class, be, 0.0, null, "test blacklisted object" ), + SearchResult.from( BlacklistedEntity.class, bp, 0.0, null, "test blacklisted object" ), + SearchResult.from( ExpressionExperiment.class, ee, 1.0, null, "test object" ) ) ); verify( expressionExperimentService ).loadValueObjects( Collections.singletonList( ee ) ); assertThat( vos ) .extracting( "resultType", "resultId" ) diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java new file mode 100644 index 0000000000..7209a6d502 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java @@ -0,0 +1,144 @@ +package ubic.gemma.core.search.lucene; + +import org.junit.Test; +import ubic.gemma.core.search.SearchException; +import ubic.gemma.model.common.search.SearchSettings; + +import java.net.URI; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.util.Sets.set; +import static org.junit.Assert.*; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.*; + +public class LuceneQueryUtilsTest { + + @Test + public void testExtractTerms() throws SearchException { + assertThat( extractTerms( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 AND BRCA3) OR NOT BRCA4 OR -BRCA5", null ) ) ) + .containsExactlyInAnyOrder( "BRCA1", "BRCA2", "BRCA3" ); + // fielded terms are excluded + assertThat( extractTerms( SearchSettings.geneSearch( "shortName:GSE1234 test", null ) ) ) + .containsExactlyInAnyOrder( "test" ); + } + + @Test + public void testExtractDnf() throws SearchException { + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 AND BRCA3) OR NOT BRCA4 OR -BRCA5 OR (BRCA6 OR BRCA7)", null ) ) ) + .containsExactlyInAnyOrder( set( "BRCA1" ), set( "BRCA2", "BRCA3" ), set( "BRCA6" ), set( "BRCA7" ) ); + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 AND BRCA2", null ) ) ) + .containsExactlyInAnyOrder( set( "BRCA1", "BRCA2" ) ); + assertThat( extractTermsDnf( SearchSettings.geneSearch( "NOT BRCA1 AND NOT BRCA2", null ) ) ) + .isEmpty(); + assertThat( extractTermsDnf( SearchSettings.geneSearch( "NOT BRCA1 OR NOT BRCA2", null ) ) ) + .isEmpty(); + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 AND NOT BRCA2", null ) ) ) + .containsExactly( set( "BRCA1" ) ); + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 OR NOT (BRCA2 AND BRCA3)", null ) ) ) + .containsExactly( set( "BRCA1" ) ); + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 AND (BRCA2 OR BRCA3)", null ) ) ) + .isEmpty(); + } + + @Test + public void testExtractDnfWithQuotedSpaces() throws SearchException { + assertThat( extractTermsDnf( SearchSettings.geneSearch( "\"alpha beta\" OR \"gamma delta\"", null ) ) ) + .containsExactlyInAnyOrder( set( "alpha beta" ), set( "gamma delta" ) ); + } + + @Test + public void testExtractDnfWithNestedOrInClause() throws SearchException { + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 OR (BRCA3 AND BRCA4))", null ) ) ) + .containsExactlyInAnyOrder( set( "BRCA1" ), set( "BRCA2" ), set( "BRCA3", "BRCA4" ) ); + } + + @Test + public void testExtractDnfWithNestedAndInSubClause() throws SearchException { + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 AND (BRCA3 AND BRCA4))", null ) ) ) + .containsExactlyInAnyOrder( set( "BRCA1" ), set( "BRCA2", "BRCA3", "BRCA4" ) ); + } + + @Test + public void testExtractDnfWithUris() throws SearchException { + // this is an important case for searching datasets by ontology terms + assertThat( extractTermsDnf( SearchSettings.geneSearch( "http://example.com/GO:1234 OR http://example.com/GO:1235", null ) ) ) + .contains( set( "http://example.com/GO:1234" ), set( "http://example.com/GO:1235" ) ); + } + + @Test + public void testPrepareDatabaseQuery() throws SearchException { + assertEquals( "BRCA1", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA1", null ) ) ); + assertEquals( "BRCA1", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA1^4", null ) ) ); + assertEquals( "BRCA1", prepareDatabaseQuery( SearchSettings.geneSearch( "\"BRCA1\"", null ) ) ); + assertEquals( "BRCA1", prepareDatabaseQuery( SearchSettings.geneSearch( "(BRCA1)", null ) ) ); + // fielded term are ignored + assertNull( prepareDatabaseQuery( SearchSettings.geneSearch( "symbol:BRCA1", null ) ) ); + assertEquals( "+BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "\\+BRCA", null ), true ) ); + assertEquals( "BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA OR TCGA", null ) ) ); + assertEquals( "BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA AND TCGA", null ) ) ); + assertEquals( "BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA AND NOT TCGA", null ) ) ); + assertEquals( "TCGA", prepareDatabaseQuery( SearchSettings.geneSearch( "NOT BRCA AND TCGA", null ) ) ); + assertEquals( "BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA -TCGA", null ) ) ); + assertEquals( "BRCA AND TCGA", prepareDatabaseQuery( SearchSettings.geneSearch( "\"BRCA AND TCGA\"", null ) ) ); + // wildcards and prefix queries are ignored for database queries + assertNull( prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA*", null ) ) ); + } + + @Test + public void testPrepareDatabaseQueryWithUri() throws SearchException { + // ideal case, using quotes + assertEquals( "http://example.com/GO:1234", prepareDatabaseQuery( SearchSettings.geneSearch( "\"http://example.com/GO:1234\"", null ) ) ); + assertEquals( "http://example.com/GO:1234", prepareDatabaseQuery( SearchSettings.geneSearch( "http://example.com/GO:1234", null ) ) ); + assertEquals( "http://example.com/GO:1234?a=b#c=d", prepareDatabaseQuery( SearchSettings.geneSearch( "http://example.com/GO:1234?a=b#c=d", null ) ) ); + assertEquals( "http://example.com/GO_1234", prepareDatabaseQuery( SearchSettings.geneSearch( "http://example.com/GO_1234", null ) ) ); + assertEquals( "http://example.com/#GO_1234", prepareDatabaseQuery( SearchSettings.geneSearch( "http://example.com/#GO_1234", null ) ) ); + assertEquals( "http://example.com/GO:1234", prepareDatabaseQuery( SearchSettings.geneSearch( "http://example.com/GO:1234 OR http://example.com/GO:1235", null ) ) ); + } + + @Test + public void testPrepareDatabaseQueryForInexactMatch() throws SearchException { + assertEquals( "BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "\"BRCA\"", null ), true ) ); + assertEquals( "br%ca", prepareDatabaseQuery( SearchSettings.geneSearch( "BR*CA", null ), true ) ); + assertEquals( "brca%", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA*", null ), true ) ); + assertEquals( "BRCA*", prepareDatabaseQuery( SearchSettings.geneSearch( "\"BRCA\\*\"", null ), true ) ); + assertEquals( "brca_", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA?", null ), true ) ); + assertEquals( "BRCA?", prepareDatabaseQuery( SearchSettings.geneSearch( "\"BRCA?\"", null ), true ) ); + assertEquals( "BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "+BRCA", null ), true ) ); + // escaped wildcard + assertEquals( "BRCA?", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA\\?", null ), true ) ); + assertEquals( "BRCA*", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA\\*", null ), true ) ); + // forbidden prefix-style searches + assertEquals( "*", prepareDatabaseQuery( SearchSettings.geneSearch( "*", null ), true ) ); + assertEquals( "*BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "*BRCA", null ), true ) ); + assertEquals( "?", prepareDatabaseQuery( SearchSettings.geneSearch( "?", null ), true ) ); + assertEquals( "?RCA", prepareDatabaseQuery( SearchSettings.geneSearch( "?RCA", null ), true ) ); + // check for escaping LIKE patterns + assertEquals( "BRCA\\\\", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA\\", null ), true ) ); + assertEquals( "BRCA\\%", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA\\%", null ), true ) ); + assertEquals( "BRCA\\%", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA%", null ), true ) ); + assertEquals( "BRCA\\_", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA_", null ), true ) ); + } + + @Test + public void testIsWildcard() { + assertFalse( isWildcard( SearchSettings.geneSearch( "*", null ) ) ); + assertFalse( isWildcard( SearchSettings.geneSearch( "*BRCA", null ) ) ); + assertTrue( isWildcard( SearchSettings.geneSearch( "BR*CA", null ) ) ); + assertTrue( isWildcard( SearchSettings.geneSearch( "BRCA*", null ) ) ); + assertFalse( isWildcard( SearchSettings.geneSearch( "BRCA1 BRCA*", null ) ) ); + assertFalse( isWildcard( SearchSettings.geneSearch( "\"BRCA*\"", null ) ) ); + assertTrue( isWildcard( SearchSettings.geneSearch( "BRCA?", null ) ) ); + assertFalse( isWildcard( SearchSettings.geneSearch( "BRCA\\*", null ) ) ); + assertFalse( isWildcard( SearchSettings.geneSearch( "\"BRCA1\" \"BRCA2\"", null ) ) ); + } + + @Test + public void testPrepareTermUriQuery() throws SearchException { + assertEquals( URI.create( "http://example.com" ), prepareTermUriQuery( SearchSettings.geneSearch( "http://example.com", null ) ) ); + assertEquals( URI.create( "http://example.com" ), prepareTermUriQuery( SearchSettings.geneSearch( "\"http://example.com\"", null ) ) ); + // an invalid URI + assertNull( prepareTermUriQuery( SearchSettings.geneSearch( "\"http://example.com /test\"", null ) ) ); + // an interesting case: a fielded search for a URI + assertEquals( URI.create( "http://example.com" ), prepareTermUriQuery( SearchSettings.geneSearch( "http:\"http://example.com\"", null ) ) ); + } +} \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/source/DatabaseSearchSourceTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/source/DatabaseSearchSourceTest.java index 2ec296328a..0f1169042c 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/source/DatabaseSearchSourceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/source/DatabaseSearchSourceTest.java @@ -14,7 +14,9 @@ import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchSource; import ubic.gemma.model.common.search.SearchSettings; +import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService; +import ubic.gemma.persistence.service.expression.experiment.BlacklistedEntityService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetService; import ubic.gemma.persistence.service.genome.biosequence.BioSequenceService; @@ -74,6 +76,16 @@ public GeneSetService geneSetService() { public ExpressionExperimentSetService experimentSetService() { return mock( ExpressionExperimentSetService.class ); } + + @Bean + public ArrayDesignService arrayDesignService() { + return mock( ArrayDesignService.class ); + } + + @Bean + public BlacklistedEntityService blacklistedEntityService() { + return mock(); + } } @Autowired @@ -98,18 +110,32 @@ public void test_whenQueryContainsQuote_thenStripThem() throws SearchException { @Test public void test_whenQueryContainsLikePatterns_thenEscape() throws SearchException { databaseSearchSource.searchGene( SearchSettings.geneSearch( "BRCA%", null ) ); + verify( geneService ).findByAccession( "BRCA%", null ); verify( geneService ).findByOfficialSymbolInexact( "BRCA\\%%" ); } @Test public void test_whenQueryContainsAsterisk_thenSubstituteForPercent() throws SearchException { databaseSearchSource.searchGene( SearchSettings.geneSearch( "BRCA?*", null ) ); - verify( geneService ).findByOfficialSymbolInexact( "BRCA_%" ); + verify( geneService ).findByOfficialSymbolInexact( "brca_%" ); } @Test public void test_quotedTerms() throws SearchException { - databaseSearchSource.searchGene( SearchSettings.geneSearch( "\"BRCA1\" \"BRCA2\"", null ) ); + databaseSearchSource.searchGene( SearchSettings.geneSearch( "\"BRCA1 BRCA2\"", null ) ); verify( geneService ).findByOfficialSymbol( "BRCA1 BRCA2" ); } + + @Test + public void testSearchGeneByUri() throws SearchException { + databaseSearchSource.searchGene( SearchSettings.geneSearch( "http://purl.org/commons/record/ncbi_gene/1234", null ) ); + verify( geneService ).findByNCBIId( 1234 ); + verify( geneService ).findByOfficialSymbol( "http://purl.org/commons/record/ncbi_gene/1234" ); + } + + @Test + public void testSearchGeneByUriInexact() throws SearchException { + databaseSearchSource.searchGene( SearchSettings.geneSearch( "http://purl.org/commons/record/ncbi_gene/123?", null ) ); + verify( geneService ).findByOfficialSymbolInexact( "http://purl.org/commons/record/ncbi\\_gene/123_" ); + } } \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/source/HibernateSearchSourceTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/source/HibernateSearchSourceTest.java index f87dfabd68..6ad7dd06b3 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/source/HibernateSearchSourceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/source/HibernateSearchSourceTest.java @@ -8,6 +8,7 @@ import org.springframework.context.annotation.Configuration; import org.springframework.test.context.ContextConfiguration; import ubic.gemma.core.search.DefaultHighlighter; +import ubic.gemma.core.search.SearchException; import ubic.gemma.core.util.test.BaseDatabaseTest; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.search.SearchSettings; @@ -34,7 +35,7 @@ public HibernateSearchSource hibernateSearchSource() { private HibernateSearchSource hibernateSearchSource; @Test - public void test() throws HibernateSearchException { + public void test() throws SearchException { assertThat( hibernateSearchSource.searchExpressionExperiment( SearchSettings.expressionExperimentSearch( "hello" ) ) ) .isEmpty(); assertThat( hibernateSearchSource.searchArrayDesign( SearchSettings.expressionExperimentSearch( "hello" ) ) ) @@ -54,7 +55,7 @@ public void test() throws HibernateSearchException { } @Test - public void testSearchExpressionExperiment() throws HibernateSearchException { + public void testSearchExpressionExperiment() throws SearchException { FullTextSession fts = Search.getFullTextSession( sessionFactory.getCurrentSession() ); assertThat( hibernateSearchSource.searchExpressionExperiment( SearchSettings.expressionExperimentSearch( "hello" ) ) ) @@ -84,7 +85,7 @@ public void testSearchExpressionExperiment() throws HibernateSearchException { } @Test - public void testSearchExpressionExperimentByStatementObject() throws HibernateSearchException { + public void testSearchExpressionExperimentByStatementObject() throws SearchException { FullTextSession fts = Search.getFullTextSession( sessionFactory.getCurrentSession() ); Taxon taxon = new Taxon(); fts.persist( taxon ); @@ -123,4 +124,9 @@ public void testSearchExpressionExperimentByStatementObject() throws HibernateSe assertThat( r.getResultObject() ).isEqualTo( ee ); } ); } + + @Test + public void testSearchWithInvalidQuerySyntax() throws SearchException { + hibernateSearchSource.searchExpressionExperiment( SearchSettings.builder().query( "\"" ).build() ); + } } \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java index ef44b30051..9447253cc2 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java @@ -1,8 +1,5 @@ package ubic.gemma.core.search.source; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.search.highlight.QueryScorer; import org.junit.After; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -14,7 +11,7 @@ import ubic.basecode.ontology.model.OntologyTermSimple; import ubic.basecode.ontology.search.OntologySearchException; import ubic.gemma.core.ontology.OntologyService; -import ubic.gemma.core.search.Highlighter; +import ubic.gemma.core.search.OntologyHighlighter; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchSource; @@ -24,10 +21,10 @@ import ubic.gemma.persistence.util.TestComponent; import javax.annotation.Nullable; +import java.net.URI; import java.util.Collection; import java.util.Collections; import java.util.Map; -import java.util.Set; import static junit.framework.TestCase.assertEquals; import static org.assertj.core.api.Assertions.assertThat; @@ -83,24 +80,19 @@ public void test() throws SearchException, OntologySearchException { .thenReturn( Collections.singletonMap( ExpressionExperiment.class, Collections.singletonMap( "http://purl.obolibrary.org/obo/CL_0000129", Collections.singleton( ee ) ) ) ); Collection> results = ontologySearchSource.searchExpressionExperiment( SearchSettings.expressionExperimentSearch( "http://purl.obolibrary.org/obo/CL_0000129" ) - .withHighlighter( new Highlighter() { + .withHighlighter( new OntologyHighlighter() { @Override - public Map highlightTerm( @Nullable String termUri, String termLabel, String field ) { - return Collections.singletonMap( field, termUri != null ? String.format( "[%s](%s)", termLabel, termUri ) : termLabel ); - } - - @Nullable - @Override - public org.apache.lucene.search.highlight.Highlighter createLuceneHighlighter( QueryScorer queryScorer ) { - return null; + public Map highlight( String value, String field ) { + return Collections.singletonMap( field, value ); } @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ) { - return Collections.emptyMap(); + public Map highlightTerm( @Nullable String termUri, String termLabel, String field ) { + return Collections.singletonMap( field, termUri != null ? String.format( "[%s](%s)", termLabel, termUri ) : termLabel ); } } ) ); - verify( ontologyService ).findTerms( "http://purl.obolibrary.org/obo/CL_0000129" ); + verify( ontologyService ).getTerm( "http://purl.obolibrary.org/obo/CL_0000129" ); + verify( ontologyService ).getChildren( argThat( col -> col.size() == 1 ), eq( false ), eq( true ) ); verify( characteristicService ).findExperimentsByUris( Collections.singleton( "http://purl.obolibrary.org/obo/CL_0000129" ), null, 5000, true, false ); assertThat( results ).anySatisfy( result -> { assertThat( result ) @@ -119,24 +111,19 @@ public void testWhenTermIsNotFoundGenerateLabelFromUri() throws SearchException .thenReturn( Collections.singletonMap( ExpressionExperiment.class, Collections.singletonMap( "http://purl.obolibrary.org/obo/CL_0000129", Collections.singleton( ee ) ) ) ); Collection> results = ontologySearchSource.searchExpressionExperiment( SearchSettings.expressionExperimentSearch( "http://purl.obolibrary.org/obo/CL_0000129" ) - .withHighlighter( new Highlighter() { + .withHighlighter( new OntologyHighlighter() { @Override - public Map highlightTerm( @Nullable String termUri, String termLabel, String field ) { - return Collections.singletonMap( field, termUri != null ? String.format( "[%s](%s)", termLabel, termUri ) : termLabel ); + public Map highlight( String value, String field ) { + return Collections.singletonMap( field, value ); } - @Nullable @Override - public org.apache.lucene.search.highlight.Highlighter createLuceneHighlighter( QueryScorer queryScorer ) { - return null; - } - - @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ) { - return Collections.emptyMap(); + public Map highlightTerm( @Nullable String termUri, String termLabel, String field ) { + return Collections.singletonMap( field, termUri != null ? String.format( "[%s](%s)", termLabel, termUri ) : termLabel ); } } ) ); - verify( ontologyService ).findTerms( "http://purl.obolibrary.org/obo/CL_0000129" ); + verify( ontologyService ).getTerm( "http://purl.obolibrary.org/obo/CL_0000129" ); + verifyNoMoreInteractions( ontologyService ); verify( characteristicService ).findBestByUri( "http://purl.obolibrary.org/obo/CL_0000129" ); verify( characteristicService ).findExperimentsByUris( Collections.singleton( "http://purl.obolibrary.org/obo/CL_0000129" ), null, 5000, true, false ); assertThat( results ).anySatisfy( result -> { @@ -148,15 +135,26 @@ public Map highlightDocument( Document document, org.apache.luce } ); } + @Test + public void testSearchExpressionExperimentWithBooleanQuery() throws SearchException { + ontologySearchSource.searchExpressionExperiment( SearchSettings.expressionExperimentSearch( "a OR (b AND c) OR http://example.com/d OR \"a quoted string containing an escaped quote \\\"\"" ) ); + verify( ontologyService ).findTerms( "a" ); + verify( ontologyService ).findTerms( "b" ); + verify( ontologyService ).findTerms( "c" ); + verify( ontologyService ).getTerm( "http://example.com/d" ); + verify( ontologyService ).findTerms( "\"a quoted string containing an escaped quote \\\"\"" ); + verifyNoMoreInteractions( ontologyService ); + } + @Test public void testGetLabelFromTermUri() { - assertEquals( "GO:0004016", getLabelFromTermUri( "http://purl.obolibrary.org/obo/GO_0004016" ) ); - assertEquals( "CHEBI:7466", getLabelFromTermUri( "http://purl.obolibrary.org/obo/chebi.owl#CHEBI_7466" ) ); - assertEquals( "BIRNLEX:15001", getLabelFromTermUri( "http://ontology.neuinfo.org/NIF/Function/NIF-Function.owl#birnlex_15001" ) ); - assertEquals( "GO:0004016", getLabelFromTermUri( "http://purl.obolibrary.org/obo//GO_0004016//" ) ); - assertEquals( "http://purl.obolibrary.org////", getLabelFromTermUri( "http://purl.obolibrary.org////" ) ); - assertEquals( "PAT:ID_20327", getLabelFromTermUri( "http://www.orphanet.org/rdfns#pat_id_20327" ) ); - assertEquals( "PAT:ID_20327", getLabelFromTermUri( "http://www.orphanet.org/rdfns#pat_id_20327" ) ); - assertEquals( "63857", getLabelFromTermUri( "http://purl.org/commons/record/ncbi_gene/63857" ) ); + assertEquals( "GO:0004016", getLabelFromTermUri( URI.create( "http://purl.obolibrary.org/obo/GO_0004016" ) ) ); + assertEquals( "CHEBI:7466", getLabelFromTermUri( URI.create( "http://purl.obolibrary.org/obo/chebi.owl#CHEBI_7466" ) ) ); + assertEquals( "BIRNLEX:15001", getLabelFromTermUri( URI.create( "http://ontology.neuinfo.org/NIF/Function/NIF-Function.owl#birnlex_15001" ) ) ); + assertEquals( "GO:0004016", getLabelFromTermUri( URI.create( "http://purl.obolibrary.org/obo//GO_0004016//" ) ) ); + assertEquals( "http://purl.obolibrary.org////", getLabelFromTermUri( URI.create( "http://purl.obolibrary.org////" ) ) ); + assertEquals( "PAT:ID_20327", getLabelFromTermUri( URI.create( "http://www.orphanet.org/rdfns#pat_id_20327" ) ) ); + assertEquals( "PAT:ID_20327", getLabelFromTermUri( URI.create( "http://www.orphanet.org/rdfns#pat_id_20327" ) ) ); + assertEquals( "63857", getLabelFromTermUri( URI.create( "http://purl.org/commons/record/ncbi_gene/63857" ) ) ); } } \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java index c26027600d..7af0eb3e5a 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java @@ -62,7 +62,7 @@ public FactoryBean sessionFactory( DataSource dataSource ) { props.setProperty( "hibernate.dialect", H2Dialect.class.getName() ); props.setProperty( "hibernate.cache.use_second_level_cache", "false" ); props.setProperty( "hibernate.max_fetch_depth", "3" ); - props.setProperty( "hibernate.default_batch_fetch_size", "100" ); + props.setProperty( "hibernate.default_batch_fetch_size", "128" ); props.setProperty( "hibernate.jdbc.fetch_size", "128" ); props.setProperty( "hibernate.jdbc.batch_size", "32" ); props.setProperty( "hibernate.jdbc.batch_versioned_data", "true" ); @@ -126,8 +126,9 @@ public DataSourceInitializer( DataSource dataSource ) { @Override public void afterPropertiesSet() { JdbcTestUtils.executeSqlScript( template, applicationContext.getResource( "/sql/init-acls.sql" ), false ); + JdbcTestUtils.executeSqlScript( template, applicationContext.getResource( "/sql/init-entities.sql" ), false ); JdbcTestUtils.executeSqlScript( template, applicationContext.getResource( "/sql/h2/init-entities.sql" ), false ); - JdbcTestUtils.executeSqlScript( template, applicationContext.getResource( "/sql/h2/init-indices.sql" ), false ); + JdbcTestUtils.executeSqlScript( template, applicationContext.getResource( "/sql/init-data-slim.sql" ), false ); } } } diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/HibernateConfigTest.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/HibernateConfigTest.java index e38599fed2..f2097d704f 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/HibernateConfigTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/HibernateConfigTest.java @@ -76,7 +76,7 @@ public FactoryBean sessionFactory( DataSource dataSource ) { public void test() { Settings settings = ( ( SessionFactoryImpl ) sessionFactory ).getSettings(); assertEquals( 3, settings.getMaximumFetchDepth().intValue() ); - assertEquals( 100, settings.getDefaultBatchFetchSize() ); + assertEquals( 128, settings.getDefaultBatchFetchSize() ); assertEquals( 128, settings.getJdbcFetchSize().intValue() ); assertEquals( 32, settings.getJdbcBatchSize() ); assertTrue( settings.isJdbcBatchVersionedData() ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/Maps.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/Maps.java new file mode 100644 index 0000000000..0f2f8f4a61 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/Maps.java @@ -0,0 +1,23 @@ +package ubic.gemma.core.util.test; + +import org.springframework.util.Assert; + +import java.util.HashMap; +import java.util.Map; + +/** + * Extensions for AssertJ's {@link org.assertj.core.util.Maps}. + */ +public class Maps { + + public static Map map( K key, V value, Object... keyValues ) { + Assert.isTrue( keyValues.length % 2 == 0, "You must provide an even number of key-value pairs" ); + return new HashMap( 1 + keyValues.length / 2 ) {{ + put( key, value ); + for ( int i = 0; i < keyValues.length; i += 2 ) { + //noinspection unchecked + put( ( K ) keyValues[i], ( V ) keyValues[i + 1] ); + } + }}; + } +} diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastIntegrationTests.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastIntegrationTests.java index 0ba3d6fc1d..e155cb8b23 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastIntegrationTests.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastIntegrationTests.java @@ -3,11 +3,15 @@ import org.junit.experimental.categories.Categories; import org.junit.runner.RunWith; import org.junit.runners.Suite; +import ubic.gemma.core.util.test.category.IntegrationTest; import ubic.gemma.core.util.test.category.SlowTest; +/** + * Fast integration tests. + */ @RunWith(Categories.class) -@Categories.IncludeCategory(IntegrationTests.class) +@Categories.IncludeCategory(IntegrationTest.class) @Categories.ExcludeCategory(SlowTest.class) -@Suite.SuiteClasses(AllTests.class) +@Suite.SuiteClasses({ AllTests.class }) public class FastIntegrationTests { } diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastUnitTests.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastUnitTests.java index a4b6e7fdf1..91f87e7bf9 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastUnitTests.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastUnitTests.java @@ -12,5 +12,5 @@ @RunWith(Categories.class) @Categories.ExcludeCategory({ IntegrationTest.class, SlowTest.class }) @Suite.SuiteClasses(AllTests.class) -public class FastUnitTests extends UnitTests { +public class FastUnitTests { } diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/IntegrationTests.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/IntegrationTests.java index b9308a1cb4..f741e7d04a 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/IntegrationTests.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/IntegrationTests.java @@ -5,6 +5,9 @@ import org.junit.runners.Suite; import ubic.gemma.core.util.test.category.IntegrationTest; +/** + * Integration tests. + */ @RunWith(Categories.class) @Categories.IncludeCategory(IntegrationTest.class) @Suite.SuiteClasses({ AllTests.class }) diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/UnitTests.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/UnitTests.java index 1b65bd1edb..3b99da4f3d 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/UnitTests.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/UnitTests.java @@ -5,6 +5,9 @@ import org.junit.runners.Suite; import ubic.gemma.core.util.test.category.IntegrationTest; +/** + * Unit tests. + */ @RunWith(Categories.class) @Categories.ExcludeCategory(IntegrationTest.class) @Suite.SuiteClasses(AllTests.class) diff --git a/gemma-core/src/test/java/ubic/gemma/model/common/auditAndSecurity/AuditEventDaoImplTest.java b/gemma-core/src/test/java/ubic/gemma/model/common/auditAndSecurity/AuditEventServiceTest.java similarity index 97% rename from gemma-core/src/test/java/ubic/gemma/model/common/auditAndSecurity/AuditEventDaoImplTest.java rename to gemma-core/src/test/java/ubic/gemma/model/common/auditAndSecurity/AuditEventServiceTest.java index 4b74def6e4..bcdc7cd84f 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/common/auditAndSecurity/AuditEventDaoImplTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/common/auditAndSecurity/AuditEventServiceTest.java @@ -37,7 +37,7 @@ /** * @author pavlidis */ -public class AuditEventDaoImplTest extends BaseSpringContextTest { +public class AuditEventServiceTest extends BaseSpringContextTest { @Autowired private ArrayDesignService ads; diff --git a/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicServiceTest.java b/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicServiceTest.java index ee875a66ed..66f8ae450a 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicServiceTest.java @@ -91,7 +91,7 @@ public void setUp() throws Exception { fv.setCharacteristics( this.getTestPersistentStatements( 1 ) ); fvService.update( fv ); - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); } @Test diff --git a/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicUtilsTest.java b/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicUtilsTest.java index 93e1c65cab..27feeef0ad 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicUtilsTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicUtilsTest.java @@ -2,12 +2,39 @@ import org.junit.Test; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.*; +import static ubic.gemma.model.common.description.CharacteristicUtils.*; public class CharacteristicUtilsTest { @Test - public void test() { + public void testUncategorized() { + assertTrue( isUncategorized( createCharacteristic( null, null, null, null ) ) ); + assertFalse( isUncategorized( createCharacteristic( "a", null, null, null ) ) ); + } + + @Test + public void testIsFreeTextCategory() { + assertFalse( isFreeTextCategory( createCharacteristic( null, null, null, null ) ) ); + assertTrue( isFreeTextCategory( createCharacteristic( "a", null, null, null ) ) ); + } + + @Test + public void testIsFreeText() { + assertTrue( isFreeText( createCharacteristic( null, null, "foo", null ) ) ); + assertFalse( isFreeText( createCharacteristic( null, null, "foo", "bar" ) ) ); + } + + @Test + public void testEquals() { + assertTrue( CharacteristicUtils.equals( "a", "b", "a", "b" ) ); + assertTrue( CharacteristicUtils.equals( null, "b", "c", "b" ) ); + assertFalse( CharacteristicUtils.equals( null, "b", "c", "c" ) ); + assertTrue( CharacteristicUtils.equals( "A", null, "a", null ) ); + } + + @Test + public void testCompareTerm() { // terms with identical URIs are collapsed assertEquals( 0, CharacteristicUtils.compareTerm( "a", "test", "b", "test" ) ); // terms with different URIs are compared by label @@ -15,4 +42,13 @@ public void test() { assertEquals( 1, CharacteristicUtils.compareTerm( "b", "test", "a", "bar" ) ); } + private Characteristic createCharacteristic( String category, String categoryUri, String value, String valueUri ) { + Characteristic c = new Characteristic(); + c.setCategory( category ); + c.setCategoryUri( categoryUri ); + c.setValue( value ); + c.setValueUri( valueUri ); + return c; + } + } \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/model/common/search/SearchSettingsTest.java b/gemma-core/src/test/java/ubic/gemma/model/common/search/SearchSettingsTest.java index d98ada02b9..471f887fc2 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/common/search/SearchSettingsTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/common/search/SearchSettingsTest.java @@ -2,8 +2,10 @@ import org.junit.Test; import ubic.gemma.core.search.DefaultHighlighter; +import ubic.gemma.core.search.SearchException; import static org.assertj.core.api.Assertions.assertThat; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.prepareTermUriQuery; public class SearchSettingsTest { @@ -11,51 +13,23 @@ public class SearchSettingsTest { public void testSetQueryWhenQueryContainsBlankThenTrimAccordingly() { SearchSettings searchSettings = SearchSettings.builder().build(); searchSettings.setQuery( " " ); - assertThat( searchSettings.getQuery() ).isEqualTo( "" ); - assertThat( searchSettings.getRawQuery() ).isEqualTo( " " ); + assertThat( searchSettings.getQuery() ).isEqualTo( " " ); } @Test - public void testSetQueryWhenQueryIsNull() { - SearchSettings searchSettings = SearchSettings.builder().build(); - searchSettings.setQuery( null ); - assertThat( searchSettings.getQuery() ).isNull(); - assertThat( searchSettings.getRawQuery() ).isNull(); - } - - @Test - public void testSetQueryWhenQueryIsATermUri() { + public void testSetQueryWhenQueryIsATermUri() throws SearchException { SearchSettings searchSettings = SearchSettings.builder().build(); searchSettings.setQuery( "http://example.ca/" ); assertThat( searchSettings.getQuery() ).isEqualTo( "http://example.ca/" ); - assertThat( searchSettings.getRawQuery() ).isEqualTo( "http://example.ca/" ); - assertThat( searchSettings.isTermQuery() ).isTrue(); - assertThat( searchSettings.getTermUri() ).isEqualTo( "http://example.ca/" ); + assertThat( prepareTermUriQuery( searchSettings ) ).isNotNull().hasToString( "http://example.ca/" ); } @Test - public void testSetQueryWhenQueryIsATermUriWithTrailingBlanks() { + public void testSetQueryWhenQueryIsATermUriWithTrailingBlanks() throws SearchException { SearchSettings searchSettings = SearchSettings.builder().build(); searchSettings.setQuery( " http://example.ca/ " ); - assertThat( searchSettings.getQuery() ).isEqualTo( "http://example.ca/" ); - assertThat( searchSettings.getRawQuery() ).isEqualTo( " http://example.ca/ " ); - assertThat( searchSettings.isTermQuery() ).isTrue(); - assertThat( searchSettings.getTermUri() ).isEqualTo( "http://example.ca/" ); - } - - - @Test - public void testSetTermUriWhenUriIsBlank() { - SearchSettings searchSettings = SearchSettings.builder().build(); - searchSettings.setTermUri( "" ); - assertThat( searchSettings.isTermQuery() ).isFalse(); - } - - @Test - public void testSetTermUriWhenUriIsNull() { - SearchSettings searchSettings = SearchSettings.builder().build(); - searchSettings.setTermUri( null ); - assertThat( searchSettings.isTermQuery() ).isFalse(); + assertThat( searchSettings.getQuery() ).isEqualTo( " http://example.ca/ " ); + assertThat( prepareTermUriQuery( searchSettings ) ).isNotNull().hasToString( "http://example.ca/" ); } @Test diff --git a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java index 8f495236ae..19021f478d 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java @@ -31,7 +31,6 @@ import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.description.DatabaseEntry; import ubic.gemma.model.common.quantitationtype.QuantitationType; -import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector; import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector; @@ -430,8 +429,8 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { assertThat( c2.getNumberOfExpressionExperiments() ).isEqualTo( 1L ); }; - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, null, 0, null, 0 ) ) .noneSatisfy( consumer ); // add the term to the dataset and update the pivot table @@ -440,12 +439,12 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { assertThat( c.getId() ).isNotNull(); // the table is out-of-date - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, null, 0, null, 0 ) ) .noneSatisfy( consumer ); // update the pivot table - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, null, 0, null, 0 ) ) .satisfiesOnlyOnce( consumer ); // remove the term, which must evict the query cache @@ -458,7 +457,7 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { } ); // since deletions are cascaded, the change will be reflected immediatly - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, null, 0, null, 0 ) ) .noneSatisfy( consumer ); } diff --git a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java index b9354d4db5..b23be9635f 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java @@ -182,7 +182,7 @@ public void testGetFiltersWithCategories() { @Test public void testGetAnnotationsUsageFrequency() { - expressionExperimentService.getAnnotationsUsageFrequency( Filters.empty(), -1, 0, null, null, null, null ); + expressionExperimentService.getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 0, null, -1 ); verify( expressionExperimentDao ).getAnnotationsUsageFrequency( null, null, -1, 0, null, null, null, null ); verifyNoMoreInteractions( expressionExperimentDao ); } @@ -190,7 +190,7 @@ public void testGetAnnotationsUsageFrequency() { @Test public void testGetAnnotationsUsageFrequencyWithFilters() { Filters f = Filters.by( "c", "valueUri", String.class, Filter.Operator.eq, "http://example.com/T00001", "characteristics.valueUri" ); - expressionExperimentService.getAnnotationsUsageFrequency( f, -1, 0, null, null, null, null ); + expressionExperimentService.getAnnotationsUsageFrequency( f, null, null, null, null, 0, null, -1 ); verify( expressionExperimentDao ).loadIdsWithCache( f, null ); verify( expressionExperimentDao ).getAnnotationsUsageFrequency( Collections.emptyList(), null, -1, 0, null, null, null, null ); verifyNoMoreInteractions( expressionExperimentDao ); diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/AbstractDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/AbstractDaoTest.java index 4483de2c45..c59b808f7e 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/AbstractDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/AbstractDaoTest.java @@ -4,19 +4,20 @@ import org.hibernate.FlushMode; import org.hibernate.Session; import org.hibernate.SessionFactory; +import org.hibernate.cfg.Settings; import org.hibernate.criterion.Restrictions; +import org.hibernate.engine.spi.SessionFactoryImplementor; import org.hibernate.metadata.ClassMetadata; import org.hibernate.proxy.HibernateProxy; import org.hibernate.proxy.LazyInitializer; import org.junit.Before; import org.junit.Test; -import org.mockito.internal.verification.VerificationModeFactory; import ubic.gemma.model.common.Identifiable; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.LongStream; import static org.mockito.Mockito.*; @@ -43,19 +44,22 @@ public MyDao( SessionFactory sessionFactory ) { } } - private SessionFactory sessionFactory; + private SessionFactoryImplementor sessionFactory; + private Settings settings; private Session session; - private MyDao myDao; @Before public void setUp() { session = mock( Session.class ); - sessionFactory = mock( SessionFactory.class ); + sessionFactory = mock( SessionFactoryImplementor.class ); ClassMetadata myEntityClassMetadata = mock( ClassMetadata.class ); when( myEntityClassMetadata.getIdentifierPropertyName() ).thenReturn( "id" ); when( myEntityClassMetadata.getMappedClass() ).thenReturn( MyEntity.class ); + settings = mock( Settings.class ); + when( settings.getDefaultBatchFetchSize() ).thenReturn( -1 ); when( sessionFactory.getClassMetadata( MyEntity.class ) ).thenReturn( myEntityClassMetadata ); when( sessionFactory.getCurrentSession() ).thenReturn( session ); + when( sessionFactory.getSettings() ).thenReturn( settings ); when( session.getFlushMode() ).thenReturn( FlushMode.AUTO ); } @@ -64,8 +68,8 @@ private static abstract class MyEntityProxy extends MyEntity implements Hibernat } @Test - public void testLoadByCollection() { - myDao = new MyDao( sessionFactory ); + public void testLoadByIds() { + MyDao myDao = new MyDao( sessionFactory ); Criteria mockCriteria = mock( Criteria.class ); when( mockCriteria.add( any() ) ).thenReturn( mockCriteria ); when( session.createCriteria( MyEntity.class ) ).thenReturn( mockCriteria ); @@ -86,7 +90,32 @@ public void testLoadByCollection() { verify( session ).load( MyEntity.class, 5L ); verify( session ).createCriteria( MyEntity.class ); verifyNoMoreInteractions( session ); - verify( mockCriteria ).add( argThat( criterion -> criterion.toString().equals( Restrictions.in( "id", ids ).toString() ) ) ); + verify( mockCriteria ).add( argThat( criterion -> criterion.toString().equals( Restrictions.in( "id", Arrays.asList( 1L, 2L, 3L, 4L, 5L, 5L, 5L, 5L ) ).toString() ) ) ); verify( mockCriteria ).list(); } + + @Test + public void testBatchLoadingByIds() { + when( settings.getDefaultBatchFetchSize() ).thenReturn( 128 ); + MyDao myDao = new MyDao( sessionFactory ); + Criteria mockCriteria = mock( Criteria.class ); + when( mockCriteria.add( any() ) ).thenReturn( mockCriteria ); + when( session.createCriteria( MyEntity.class ) ).thenReturn( mockCriteria ); + when( session.load( any( Class.class ), any() ) ).thenAnswer( a -> { + MyEntityProxy entity = mock( MyEntityProxy.class ); + LazyInitializer lazyInitializer = mock( LazyInitializer.class ); + when( lazyInitializer.isUninitialized() ).thenReturn( true ); + when( entity.getId() ).thenReturn( a.getArgument( 1 ) ); + when( entity.getHibernateLazyInitializer() ).thenReturn( lazyInitializer ); + return entity; + } ); + List ids = LongStream.range( 0, 1200 ).boxed().collect( Collectors.toList() ); + myDao.load( ids ); + verify( session, times( 1200 ) ).load( eq( MyEntity.class ), any() ); + verify( session, times( 10 ) ).createCriteria( MyEntity.class ); + verifyNoMoreInteractions( session ); + verify( mockCriteria, times( 10 ) ).add( any() ); + verify( mockCriteria, times( 10 ) ).list(); + verifyNoMoreInteractions( mockCriteria ); + } } diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilIntegrationTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilIntegrationTest.java index 6a5525418e..d81d6884bb 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilIntegrationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilIntegrationTest.java @@ -60,19 +60,19 @@ public void testWhenUserIsAnonymous() { @Test @WithMockUser(authorities = "GROUP_AGENT") public void testUpdateExpressionExperiment2CharacteristicEntries() { - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( ExpressionExperiment.class ); - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( BioMaterial.class ); - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( ExperimentalDesign.class ); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( ExpressionExperiment.class, false ); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( BioMaterial.class, false ); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( ExperimentalDesign.class, false ); assertThatThrownBy( () -> { - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( FactorValue.class ); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( FactorValue.class, false ); } ).isInstanceOf( IllegalArgumentException.class ); } @Test(expected = AccessDeniedException.class) public void testUpdateEE2CAsUser() { this.runAsAnonymous(); - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); } @Test diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilTest.java index 9b17dc9166..90a9bbecd8 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilTest.java @@ -44,7 +44,7 @@ static class TableMaintenanceUtilTestContextConfiguration { @Bean public static TestPropertyPlaceholderConfigurer propertyPlaceholderConfigurer() throws IOException { Path gene2csInfoPath = Files.createTempDirectory( "DBReport" ).resolve( "gene2cs.info" ); - return new TestPropertyPlaceholderConfigurer( "gemma.gene2cs.path=" + gene2csInfoPath, "gemma.admin.email=gemma" ); + return new TestPropertyPlaceholderConfigurer( "gemma.gene2cs.path=" + gene2csInfoPath ); } /** @@ -133,7 +133,7 @@ public void test() { verify( query ).executeUpdate(); verify( externalDatabaseService ).findByNameWithAuditTrail( "gene2cs" ); verify( externalDatabaseService ).updateReleaseLastUpdated( eq( gene2csDatabaseEntry ), eq( "" ), any() ); - verify( mailEngine ).send( any() ); + verify( mailEngine ).sendAdminMessage( any(), any() ); } @Test diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImplTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImplTest.java index 50d19bcc2d..460a6ba3f4 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImplTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImplTest.java @@ -61,7 +61,7 @@ static class CharacteristicDaoImplContextConfiguration extends BaseDatabaseTestC @Bean public static TestPropertyPlaceholderConfigurer propertyPlaceholderConfigurer() throws IOException { Path gene2csInfoPath = Files.createTempDirectory( "DBReport" ).resolve( "gene2cs.info" ); - return new TestPropertyPlaceholderConfigurer( "gemma.gene2cs.path=" + gene2csInfoPath, "gemma.admin.email=gemma" ); + return new TestPropertyPlaceholderConfigurer( "gemma.gene2cs.path=" + gene2csInfoPath ); } /** @@ -174,7 +174,7 @@ public void testFindExperimentsByUris() { acl.insertAce( 0, BasePermission.READ, new AclPrincipalSid( "bob" ), false ); aclService.updateAcl( acl ); - int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); assertThat( updated ).isEqualTo( 1 ); sessionFactory.getCurrentSession().flush(); // ranking by level uses the order by field() which is not supported @@ -201,7 +201,7 @@ public void testFindExperimentsByUrisAsAnonymous() { aclService.updateAcl( acl ); sessionFactory.getCurrentSession().flush(); - int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); assertThat( updated ).isEqualTo( 1 ); sessionFactory.getCurrentSession().flush(); @@ -233,7 +233,7 @@ public void testFindExperimentsByUrisAsAdmin() { sessionFactory.getCurrentSession().persist( ee ); sessionFactory.getCurrentSession().flush(); aclService.createAcl( new AclObjectIdentity( ExpressionExperiment.class, ee.getId() ) ); - int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); assertThat( updated ).isEqualTo( 1 ); sessionFactory.getCurrentSession().flush(); // ranking by level uses the order by field() which is not supported diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java index db387a4607..5e16197ca1 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java @@ -16,6 +16,7 @@ import org.springframework.test.context.TestExecutionListeners; import ubic.gemma.core.util.test.BaseDatabaseTest; import ubic.gemma.model.common.description.Characteristic; +import ubic.gemma.model.common.description.CharacteristicUtils; import ubic.gemma.model.common.quantitationtype.*; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; @@ -30,10 +31,9 @@ import ubic.gemma.persistence.util.*; import javax.annotation.Nullable; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.LongStream; import static org.junit.Assert.*; @@ -173,8 +173,22 @@ public void testGetOriginalPlatformUsageFrequency() { @WithMockUser(authorities = "GROUP_ADMIN") public void testGetCategoriesWithUsageFrequency() { Characteristic c = createCharacteristic( "foo", "foo", "bar", "bar" ); - Assertions.assertThat( expressionExperimentDao.getCategoriesUsageFrequency( null, null, null, null ) ) - .containsEntry( c, 1L ); + Assertions.assertThat( expressionExperimentDao.getCategoriesUsageFrequency( null, null, null, null, -1 ) ) + .containsEntry( CharacteristicUtils.getCategory( c ), 1L ); + } + + @Test + @WithMockUser + public void testGetCategoriesUsageFrequencyAsAnonymous() { + expressionExperimentDao.getCategoriesUsageFrequency( null, null, null, null, -1 ); + } + + /** + * No ACL filtering is done when explicit IDs are provided, so this should work without {@link WithMockUser}. + */ + @Test + public void testGetCategoriesUsageFrequencyWithIds() { + expressionExperimentDao.getCategoriesUsageFrequency( Collections.singleton( 1L ), null, null, null, -1 ); } @Test @@ -185,6 +199,21 @@ public void testGetAnnotationUsageFrequency() { .containsEntry( c, 1L ); } + @Test + @WithMockUser + public void testGetAnnotationUsageFrequencyAsAnonymous() { + expressionExperimentDao.getAnnotationsUsageFrequency( null, null, 10, 1, null, null, null, null ); + } + + @Test + @WithMockUser(authorities = "GROUP_ADMIN") + public void testGetAnnotationUsageFrequencyWithLargeBatch() { + Characteristic c = createCharacteristic( "foo", "foo", "bar", "bar" ); + List ees = LongStream.range( 0, 10000 ).boxed().collect( Collectors.toList() ); + Assertions.assertThat( expressionExperimentDao.getAnnotationsUsageFrequency( ees, null, 10, 1, null, null, null, null ) ) + .containsEntry( c, 1L ); + } + @Test @WithMockUser(authorities = "GROUP_ADMIN") public void testGetAnnotationUsageFrequencyRetainMentionedTerm() { @@ -242,6 +271,14 @@ public void testGetAnnotationUsageFrequencyWithUncategorizedCategory() { .doesNotContainKey( c2 ); } + /** + * No ACL filtering is done when explicit IDs are provided, so this should work without {@link WithMockUser}. + */ + @Test + public void testGetAnnotationUsageFrequencyWithIds() { + expressionExperimentDao.getAnnotationsUsageFrequency( Collections.singleton( 1L ), null, 10, 1, null, null, null, null ); + } + private Characteristic createCharacteristic( @Nullable String category, @Nullable String categoryUri, String value, @Nullable String valueUri ) { ExpressionExperiment ee = new ExpressionExperiment(); sessionFactory.getCurrentSession().persist( ee ); diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/util/GenericValueObjectConverterTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/util/GenericValueObjectConverterTest.java deleted file mode 100644 index b5add26e12..0000000000 --- a/gemma-core/src/test/java/ubic/gemma/persistence/util/GenericValueObjectConverterTest.java +++ /dev/null @@ -1,72 +0,0 @@ -package ubic.gemma.persistence.util; - -import org.junit.Before; -import org.junit.Test; -import org.springframework.core.convert.ConverterNotFoundException; -import org.springframework.core.convert.support.ConfigurableConversionService; -import org.springframework.core.convert.support.GenericConversionService; -import ubic.gemma.model.IdentifiableValueObject; -import ubic.gemma.model.common.description.DatabaseEntry; -import ubic.gemma.model.common.description.DatabaseEntryValueObject; -import ubic.gemma.model.expression.arrayDesign.ArrayDesign; -import ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject; - -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; - -public class GenericValueObjectConverterTest { - - private final ConfigurableConversionService converter = new GenericConversionService(); - - @Before - public void setUp() { - converter.addConverter( new GenericValueObjectConverter<>( DatabaseEntryValueObject::new, DatabaseEntry.class, DatabaseEntryValueObject.class ) ); - } - - @Test - public void test() { - Object converted = converter.convert( new DatabaseEntry(), DatabaseEntryValueObject.class ); - assertThat( converted ).isInstanceOf( DatabaseEntryValueObject.class ); - } - - @Test - public void testConvertToSuperClass() { - Object converted = converter.convert( new DatabaseEntry(), IdentifiableValueObject.class ); - assertThat( converted ).isInstanceOf( DatabaseEntryValueObject.class ); - } - - @Test - public void testConvertFromSubClass() { - Object converted = converter.convert( new SpecificDatabaseEntry(), DatabaseEntryValueObject.class ); - assertThat( converted ).isInstanceOf( DatabaseEntryValueObject.class ); - } - - private static class SpecificDatabaseEntry extends DatabaseEntry { - - } - - @Test - public void testConvertCollection() { - Object converted = converter.convert( Collections.singleton( new DatabaseEntry() ), List.class ); - assertThat( converted ).isInstanceOf( List.class ); - } - - @Test - public void testConvertCollectionToListSuperType() { - Object converted = converter.convert( Collections.singleton( new DatabaseEntry() ), Collection.class ); - assertThat( converted ).isInstanceOf( List.class ); - } - - @Test(expected = ConverterNotFoundException.class) - public void testConvertUnsupportedType() { - converter.convert( new ArrayDesign(), ArrayDesignValueObject.class ); - } - - @Test - public void testConvertNull() { - assertThat( converter.convert( null, DatabaseEntryValueObject.class ) ).isNull(); - } -} \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/util/ListUtilsTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/util/ListUtilsTest.java index 0745b433b4..44e2d5cc03 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/util/ListUtilsTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/util/ListUtilsTest.java @@ -4,6 +4,7 @@ import ubic.gemma.core.util.ListUtils; import java.util.Arrays; +import java.util.Collections; import java.util.Map; import static org.assertj.core.api.Assertions.assertThat; @@ -33,4 +34,12 @@ public void testIndexOfCaseInsensitiveStringElements() { assertThat( str2position.get( "A" ) ).isEqualTo( 0 ); assertThat( str2position.get( "baBa" ) ).isEqualTo( 2 ); } + + @Test + public void testPadToNextPowerOfTwo() { + assertThat( ListUtils.padToNextPowerOfTwo( Collections.emptyList(), null ) ).hasSize( 0 ); + assertThat( ListUtils.padToNextPowerOfTwo( Arrays.asList( 1L, 2L, 3L ), null ) ).hasSize( 4 ); + assertThat( ListUtils.padToNextPowerOfTwo( Arrays.asList( 1L, 2L, 3L, 4L ), null ) ).hasSize( 4 ); + assertThat( ListUtils.padToNextPowerOfTwo( Arrays.asList( 1L, 2L, 3L, 4L, 5L ), null ) ).hasSize( 8 ); + } } \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/util/MailEngineTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/util/MailEngineTest.java index d867f29c04..fb845878c0 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/util/MailEngineTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/util/MailEngineTest.java @@ -6,21 +6,21 @@ import org.junit.Test; import org.mockito.ArgumentCaptor; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.config.PropertyPlaceholderConfigurer; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.mail.MailSender; import org.springframework.mail.SimpleMailMessage; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; +import ubic.gemma.core.util.test.TestPropertyPlaceholderConfigurer; import java.util.HashMap; import java.util.Map; import java.util.Properties; import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.*; @ContextConfiguration public class MailEngineTest extends AbstractJUnit4SpringContextTests { @@ -29,6 +29,14 @@ public class MailEngineTest extends AbstractJUnit4SpringContextTests { @TestComponent static class MailEngineTestContextConfiguration { + @Bean + public static PropertyPlaceholderConfigurer propertyPlaceholderConfigurer() { + return new TestPropertyPlaceholderConfigurer( + "gemma.admin.email=gemma@chibi.msl.ubc.ca", + "gemma.noreply.email=noreply@gemma.pavlab.msl.ubc.ca", + "gemma.support.email=pavlab-support@msl.ubc.ca" ); + } + @Bean public MailEngine mailEngine() { return new MailEngineImpl(); @@ -61,13 +69,13 @@ public void tearDown() { @Test public void test() { - mailEngine.sendAdminMessage( "test", "test subject" ); + mailEngine.sendAdminMessage( "test subject", "test" ); ArgumentCaptor captor = ArgumentCaptor.forClass( SimpleMailMessage.class ); verify( mailSender ).send( captor.capture() ); assertThat( captor.getValue() ) .isNotNull().satisfies( m -> { - assertThat( m.getTo() ).containsExactly( Settings.getAdminEmailAddress() ); - assertThat( m.getFrom() ).isEqualTo( Settings.getAdminEmailAddress() ); + assertThat( m.getTo() ).containsExactly( "gemma@chibi.msl.ubc.ca" ); + assertThat( m.getFrom() ).isEqualTo( "noreply@gemma.pavlab.msl.ubc.ca" ); assertThat( m.getSubject() ).isEqualTo( "test subject" ); assertThat( m.getText() ).isEqualTo( "test" ); } ); @@ -79,7 +87,7 @@ public void testSendMessageWithVelocityTemplate() { vars.put( "username", "foo" ); vars.put( "siteurl", "http://example.com/" ); vars.put( "confirmLink", "http://example.com/confirm?token=12ijdqwer9283" ); - mailEngine.sendMessage( new SimpleMailMessage(), "accountCreated.vm", vars ); + mailEngine.sendMessage( "test", "subject", "accountCreated.vm", vars ); ArgumentCaptor captor = ArgumentCaptor.forClass( SimpleMailMessage.class ); verify( mailSender ).send( captor.capture() ); assertThat( captor.getValue() ) diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/util/QueryUtilsTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/util/QueryUtilsTest.java new file mode 100644 index 0000000000..e86ee42c37 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/persistence/util/QueryUtilsTest.java @@ -0,0 +1,46 @@ +package ubic.gemma.persistence.util; + +import org.junit.Test; +import ubic.gemma.model.expression.arrayDesign.ArrayDesign; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Arrays; + +import static org.assertj.core.api.Assertions.assertThat; +import static ubic.gemma.persistence.util.QueryUtils.*; + +public class QueryUtilsTest { + + @Test + public void test() { + assertThat( optimizeParameterList( Arrays.asList( 1L, 2L, null, 0L ) ) ) + .containsExactly( 0L, 1L, 2L, null ); + } + + @Test + public void testIdentifiable() { + assertThat( optimizeIdentifiableParameterList( Arrays.asList( createArrayDesign( 2L ), + createArrayDesign( 1L ), createArrayDesign( 1L ), createArrayDesign( null ) ) ) ) + .extracting( ArrayDesign::getId ) + .containsExactly( 1L, 2L, null, null ); + } + + @Test + public void testBatchParameterList() { + assertThat( batchParameterList( new ArrayList(), 4 ) ) + .isEmpty(); + assertThat( batchParameterList( Arrays.asList( 1, 2, 3 ), 4 ) ) + .containsExactly( Arrays.asList( 1, 2, 3, 3 ) ); + assertThat( batchParameterList( Arrays.asList( 1, 2, 3, 4 ), 4 ) ) + .containsExactly( Arrays.asList( 1, 2, 3, 4 ) ); + assertThat( batchParameterList( Arrays.asList( 1, 2, 3, null, 4, 14, 23, 1 ), 4 ) ) + .containsExactly( Arrays.asList( 1, 2, 3, 4 ), Arrays.asList( 14, 23, null, null ) ); + } + + private ArrayDesign createArrayDesign( @Nullable Long id ) { + ArrayDesign ad = new ArrayDesign(); + ad.setId( id ); + return ad; + } +} \ No newline at end of file diff --git a/gemma-core/src/test/resources/sql/h2/init-entities.sql b/gemma-core/src/test/resources/sql/h2/init-entities.sql deleted file mode 100644 index fb4ff2c786..0000000000 --- a/gemma-core/src/test/resources/sql/h2/init-entities.sql +++ /dev/null @@ -1,72 +0,0 @@ -insert into AUDIT_TRAIL (ID) -values (1), - (2); - -insert into USER_GROUP (ID, AUDIT_TRAIL_FK, NAME, DESCRIPTION) -values (1, 1, 'Administrators', NULL), - (2, 2, 'Users', NULL); - -insert into CONTACT (ID, class, NAME, DESCRIPTION, EMAIL, LAST_NAME, USER_NAME, PASSWORD, PASSWORD_HINT, ENABLED, - SIGNUP_TOKEN, SIGNUP_TOKEN_DATESTAMP) -values (1, 'User', 'admin', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); - --- denormalized table joining genes and compositeSequences; maintained by TableMaintenanceUtil. -create table GENE2CS -( - GENE BIGINT not null, - CS BIGINT not null, - AD BIGINT not null, - primary key (AD, CS, GENE) -); -alter table GENE2CS - add constraint GENE2CS_ARRAY_DESIGN_FKC foreign key (AD) references ARRAY_DESIGN (ID) on update cascade on delete cascade; -alter table GENE2CS - add constraint GENE2CS_CS_FKC foreign key (CS) references COMPOSITE_SEQUENCE (ID) on update cascade on delete cascade; -alter table GENE2CS - add constraint GENE2CS_GENE_FKC foreign key (GENE) references CHROMOSOME_FEATURE (ID) on update cascade on delete cascade; - --- this table is created in the hibernate schema -drop table EXPRESSION_EXPERIMENT2CHARACTERISTIC; -create table EXPRESSION_EXPERIMENT2CHARACTERISTIC -( - ID bigint, - NAME varchar(255), - DESCRIPTION text, - CATEGORY varchar(255), - CATEGORY_URI varchar(255), - `VALUE` varchar(255), - VALUE_URI varchar(255), - ORIGINAL_VALUE varchar(255), - EVIDENCE_CODE varchar(255), - EXPRESSION_EXPERIMENT_FK bigint, - ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK int not null default 0, - LEVEL varchar(255), - primary key (ID, EXPRESSION_EXPERIMENT_FK) -); - -alter table EXPRESSION_EXPERIMENT2CHARACTERISTIC - add constraint EE2C_CHARACTERISTIC_FKC foreign key (ID) references CHARACTERISTIC (ID) on update cascade on delete cascade; -alter table EXPRESSION_EXPERIMENT2CHARACTERISTIC - add constraint EE2C_EXPRESSION_EXPERIMENT_FKC foreign key (EXPRESSION_EXPERIMENT_FK) references INVESTIGATION (id) on update cascade on delete cascade; - -create index EE2C_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (`VALUE`); -create index EE2C_CATEGORY on EXPRESSION_EXPERIMENT2CHARACTERISTIC (CATEGORY); -create index EE2C_VALUE_URI_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (VALUE_URI, `VALUE`); -create index EE2C_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (CATEGORY_URI, CATEGORY, VALUE_URI, `VALUE`); -create index EE2C_LEVEL on EXPRESSION_EXPERIMENT2CHARACTERISTIC (LEVEL); - -create table EXPRESSION_EXPERIMENT2ARRAY_DESIGN -( - EXPRESSION_EXPERIMENT_FK bigint not null, - ARRAY_DESIGN_FK bigint not null, - -- indicate if the platform is original (see BioAssay.originalPlatform) - IS_ORIGINAL_PLATFORM tinyint not null, - -- the permission mask of the EE for the anonymous SID - ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK int not null default 0, - primary key (EXPRESSION_EXPERIMENT_FK, ARRAY_DESIGN_FK, IS_ORIGINAL_PLATFORM) -); - -alter table EXPRESSION_EXPERIMENT2ARRAY_DESIGN - add constraint EE2AD_EXPRESSION_EXPERIMENT_FKC foreign key (EXPRESSION_EXPERIMENT_FK) references INVESTIGATION (id) on update cascade on delete cascade; -alter table EXPRESSION_EXPERIMENT2ARRAY_DESIGN - add constraint EE2AD_ARRAY_DESIGN_FKC foreign key (ARRAY_DESIGN_FK) references ARRAY_DESIGN (ID) on update cascade on delete cascade; diff --git a/gemma-core/src/test/resources/sql/h2/init-indices.sql b/gemma-core/src/test/resources/sql/h2/init-indices.sql deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/gemma-core/src/test/resources/sql/init-data-slim.sql b/gemma-core/src/test/resources/sql/init-data-slim.sql new file mode 100644 index 0000000000..17ec863f5c --- /dev/null +++ b/gemma-core/src/test/resources/sql/init-data-slim.sql @@ -0,0 +1,13 @@ +-- Slim version of init-data.sql for unit tests + +insert into AUDIT_TRAIL (ID) +values (1), + (2); + +insert into USER_GROUP (ID, AUDIT_TRAIL_FK, NAME, DESCRIPTION) +values (1, 1, 'Administrators', NULL), + (2, 2, 'Users', NULL); + +insert into CONTACT (ID, class, NAME, DESCRIPTION, EMAIL, LAST_NAME, USER_NAME, PASSWORD, PASSWORD_HINT, ENABLED, + SIGNUP_TOKEN, SIGNUP_TOKEN_DATESTAMP) +values (1, 'User', 'admin', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); \ No newline at end of file diff --git a/gemma-groovy-support/pom.xml b/gemma-groovy-support/pom.xml index 7be89bd33e..e86cae1aeb 100644 --- a/gemma-groovy-support/pom.xml +++ b/gemma-groovy-support/pom.xml @@ -6,7 +6,7 @@ gemma gemma - 1.31.2 + 1.31.3 gemma-groovy-support diff --git a/gemma-rest/pom.xml b/gemma-rest/pom.xml index 6a4cf4e2a0..ece62c2904 100644 --- a/gemma-rest/pom.xml +++ b/gemma-rest/pom.xml @@ -5,7 +5,7 @@ gemma gemma - 1.31.2 + 1.31.3 4.0.0 diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java index 683e13ab60..bab26d600f 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java @@ -32,14 +32,15 @@ import org.springframework.stereotype.Service; import ubic.gemma.core.expression.experiment.service.ExpressionExperimentSearchService; import ubic.gemma.core.ontology.OntologyService; +import ubic.gemma.core.search.ParseSearchException; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchService; +import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; import ubic.gemma.model.genome.Taxon; -import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.persistence.service.common.description.CharacteristicService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.Filters; @@ -117,8 +118,10 @@ public ResponseDataObject> searchAnnotat } try { return Responder.respond( this.getTerms( query ) ); + } catch ( ParseSearchException e ) { + throw new BadRequestException( e.getMessage(), e ); } catch ( SearchException e ) { - throw new BadRequestException( "Invalid search query.", e ); + throw new InternalServerErrorException( e ); } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 6cbc923844..8043154da4 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -27,7 +27,7 @@ import org.apache.commons.io.FilenameUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.Highlighter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.annotation.Secured; import org.springframework.stereotype.Service; @@ -39,6 +39,7 @@ import ubic.gemma.core.analysis.preprocess.svd.SVDService; import ubic.gemma.core.analysis.preprocess.svd.SVDValueObject; import ubic.gemma.core.analysis.service.ExpressionDataFileService; +import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.DefaultHighlighter; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.lucene.SimpleMarkdownFormatter; @@ -102,6 +103,7 @@ public class DatasetsWebService { private static final String ERROR_DATA_FILE_NOT_AVAILABLE = "Data file for experiment %s can not be created."; private static final String ERROR_DESIGN_FILE_NOT_AVAILABLE = "Design file for experiment %s can not be created."; + private static final int MAX_DATASETS_CATEGORIES = 200; private static final int MAX_DATASETS_ANNOTATIONS = 5000; @Autowired @@ -124,6 +126,8 @@ public class DatasetsWebService { private GeneArgService geneArgService; @Autowired private QuantitationTypeArgService quantitationTypeArgService; + @Autowired + private OntologyService ontologyService; @Autowired private HttpServletRequest request; @@ -134,6 +138,7 @@ private class Highlighter extends DefaultHighlighter { private final Set documentIdsToHighlight; private Highlighter( Set documentIdsToHighlight ) { + super( new SimpleMarkdownFormatter() ); this.documentIdsToHighlight = documentIdsToHighlight; } @@ -152,18 +157,13 @@ public Map highlightTerm( @Nullable String termUri, String termL } @Override - public org.apache.lucene.search.highlight.Highlighter createLuceneHighlighter( QueryScorer queryScorer ) { - return new org.apache.lucene.search.highlight.Highlighter( new SimpleMarkdownFormatter(), queryScorer ); - } - - @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ) { + public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ) { long id = Long.parseLong( document.get( "id" ) ); // TODO: maybe use a filter in the Lucene query? if ( !documentIdsToHighlight.contains( id ) ) { return Collections.emptyMap(); } - return super.highlightDocument( document, highlighter, analyzer, fields ); + return super.highlightDocument( document, highlighter, analyzer ); } } @@ -174,7 +174,7 @@ public Map highlightDocument( Document document, org.apache.luce @Produces(MediaType.APPLICATION_JSON) @Operation(summary = "Retrieve all datasets") public QueriedAndFilteredAndPaginatedResponseDataObject getDatasets( // Params: - @QueryParam("query") String query, + @QueryParam("query") QueryArg query, @QueryParam("filter") @DefaultValue("") FilterArg filterArg, // Optional, default null @QueryParam("offset") @DefaultValue("0") OffsetArg offsetArg, // Optional, default 0 @QueryParam("limit") @DefaultValue("20") LimitArg limitArg, // Optional, default 20 @@ -185,9 +185,9 @@ public QueriedAndFilteredAndPaginatedResponseDataObject ids = new ArrayList<>( expressionExperimentService.loadIdsWithCache( filters, sort ) ); Map scoreById = new HashMap<>(); - Filters filtersWithQuery = Filters.by( filters ).and( datasetArgService.getFilterForSearchQuery( query, scoreById ) ); - List ids = new ArrayList<>( expressionExperimentService.loadIdsWithCache( filtersWithQuery, sort ) ); + ids.retainAll( datasetArgService.getIdsForSearchQuery( query, scoreById ) ); // sort is stable, so the order of IDs with the same score is preserved ids.sort( Comparator.comparingDouble( i -> -scoreById.get( i ) ) ); @@ -207,7 +207,7 @@ public QueriedAndFilteredAndPaginatedResponseDataObject( vos, Sort.by( null, "searchResult.score", Sort.Direction.DESC ), offset, limit, ( long ) ids.size() ) .map( vo -> new ExpressionExperimentWithSearchResultValueObject( vo, resultById.get( vo.getId() ) ) ), - query, filters, new String[] { "id" } ); + query.getValue(), filters, new String[] { "id" } ); } else { return Responder.queryAndPaginate( expressionExperimentService.loadValueObjectsWithCache( filters, sort, offset, limit ).map( vo -> new ExpressionExperimentWithSearchResultValueObject( vo, null ) ), @@ -226,7 +226,7 @@ public static class ExpressionExperimentWithSearchResultValueObject extends Expr public ExpressionExperimentWithSearchResultValueObject( ExpressionExperimentValueObject vo, @Nullable SearchResult result ) { super( vo ); if ( result != null ) { - this.searchResult = new SearchWebService.SearchResultValueObject<>( SearchResult.from( result, null ) ); + this.searchResult = new SearchWebService.SearchResultValueObject<>( result.withResultObject( null ) ); } else { this.searchResult = null; } @@ -236,15 +236,19 @@ public ExpressionExperimentWithSearchResultValueObject( ExpressionExperimentValu @GET @Path("/count") @Produces(MediaType.APPLICATION_JSON) - @Operation(summary = "Count datasets matching the provided query and filter") + @Operation(summary = "Count datasets matching the provided query and filter") public ResponseDataObject getNumberOfDatasets( - @QueryParam("query") String query, - @QueryParam("filter") @DefaultValue("") FilterArg filter ) { + @QueryParam("query") QueryArg query, + @QueryParam("filter") @DefaultValue("") FilterArg filter + ) { Filters filters = datasetArgService.getFilters( filter ); + Set extraIds; if ( query != null ) { - filters.and( datasetArgService.getFilterForSearchQuery( query, null ) ); + extraIds = datasetArgService.getIdsForSearchQuery( query, null ); + } else { + extraIds = null; } - return Responder.respond( expressionExperimentService.countWithCache( filters ) ); + return Responder.respond( expressionExperimentService.countWithCache( filters, extraIds ) ); } public interface UsageStatistics { @@ -260,19 +264,20 @@ public interface UsageStatistics { @Operation(summary = "Retrieve usage statistics of platforms among datasets matching the provided query and filter", description = "Usage statistics are aggregated across experiment tags, samples and factor values mentioned in the experimental design.") public LimitedResponseDataObject getDatasetsPlatformsUsageStatistics( - @QueryParam("query") String query, + @QueryParam("query") QueryArg query, @QueryParam("filter") @DefaultValue("") FilterArg filter, - @QueryParam("limit") @DefaultValue("50") LimitArg limit ) { + @QueryParam("limit") @DefaultValue("50") LimitArg limit + ) { Filters filters = datasetArgService.getFilters( filter ); - Filters filtersWithQuery; + Set extraIds; if ( query != null ) { - filtersWithQuery = Filters.by( filters ).and( datasetArgService.getFilterForSearchQuery( query, null ) ); + extraIds = datasetArgService.getIdsForSearchQuery( query, null ); } else { - filtersWithQuery = filters; + extraIds = null; } Integer l = limit.getValueNoMaximum(); - Map tts = expressionExperimentService.getTechnologyTypeUsageFrequency( filtersWithQuery ); - Map ads = expressionExperimentService.getArrayDesignUsedOrOriginalPlatformUsageFrequency( filtersWithQuery, l ); + Map tts = expressionExperimentService.getTechnologyTypeUsageFrequency( filters, extraIds ); + Map ads = expressionExperimentService.getArrayDesignUsedOrOriginalPlatformUsageFrequency( filters, extraIds, l ); List adsVos = arrayDesignService.loadValueObjects( ads.keySet() ); Map countsById = ads.entrySet().stream().collect( Collectors.toMap( e -> e.getKey().getId(), Map.Entry::getValue ) ); List results = @@ -280,7 +285,7 @@ public LimitedResponseDataObject getD .map( e -> new ArrayDesignWithUsageStatisticsValueObject( e, countsById.get( e.getId() ), tts.getOrDefault( TechnologyType.valueOf( e.getTechnologyType() ), 0L ) ) ) .sorted( Comparator.comparing( UsageStatistics::getNumberOfExpressionExperiments, Comparator.reverseOrder() ) ) .collect( Collectors.toList() ); - return Responder.limit( results, query, filters, new String[] { "id" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ), l ); + return Responder.limit( results, query != null ? query.getValue() : null, filters, new String[] { "id" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ), l ); } @Value @@ -298,8 +303,9 @@ public static class CategoryWithUsageStatisticsValueObject implements UsageStati @Operation(summary = "Retrieve usage statistics of categories among datasets matching the provided query and filter", description = "Usage statistics are aggregated across experiment tags, samples and factor values mentioned in the experimental design.") public QueriedAndFilteredResponseDataObject getDatasetsCategoriesUsageStatistics( - @QueryParam("query") String query, + @QueryParam("query") QueryArg query, @QueryParam("filter") @DefaultValue("") FilterArg filter, + @QueryParam("limit") @DefaultValue("20") LimitArg limit, @Parameter(description = "Excluded category URIs.", hidden = true) @QueryParam("excludedCategories") StringArrayArg excludedCategoryUris, @Parameter(description = "Exclude free-text categories (i.e. those with null URIs).", hidden = true) @QueryParam("excludeFreeTextCategories") @DefaultValue("false") Boolean excludeFreeTextCategories, @Parameter(description = "Excluded term URIs; this list is expanded with subClassOf inference.", hidden = true) @QueryParam("excludedTerms") StringArrayArg excludedTermUris, @@ -310,23 +316,26 @@ public QueriedAndFilteredResponseDataObject mentionedTerms = retainMentionedTerms ? new HashSet<>() : null; Filters filters = datasetArgService.getFilters( filter, mentionedTerms ); - Filters filtersWithQuery; + Set extraIds; if ( query != null ) { - filtersWithQuery = Filters.by( filters ).and( datasetArgService.getFilterForSearchQuery( query, null ) ); + extraIds = datasetArgService.getIdsForSearchQuery( query, null ); } else { - filtersWithQuery = filters; + extraIds = null; } + int maxResults = limit.getValue( MAX_DATASETS_CATEGORIES ); List results = expressionExperimentService.getCategoriesUsageFrequency( - filtersWithQuery, + filters, + extraIds, datasetArgService.getExcludedUris( excludedCategoryUris, excludeFreeTextCategories, excludeUncategorizedTerms ), datasetArgService.getExcludedUris( excludedTermUris, excludeFreeTextTerms, excludeUncategorizedTerms ), - mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null ) + mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null, + maxResults ) .entrySet() .stream() .map( e -> new CategoryWithUsageStatisticsValueObject( e.getKey().getCategoryUri(), e.getKey().getCategory(), e.getValue() ) ) .sorted( Comparator.comparing( UsageStatistics::getNumberOfExpressionExperiments, Comparator.reverseOrder() ) ) .collect( Collectors.toList() ); - return Responder.queryAndFilter( results, query, filters, new String[] { "classUri", "className" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ) ); + return Responder.queryAndFilter( results, query != null ? query.getValue() : null, filters, new String[] { "classUri", "className" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ) ); } @Value @@ -354,7 +363,7 @@ public ArrayDesignWithUsageStatisticsValueObject( ArrayDesignValueObject arrayDe @Operation(summary = "Retrieve usage statistics of annotations among datasets matching the provided query and filter", description = "Usage statistics are aggregated across experiment tags, samples and factor values mentioned in the experimental design.") public LimitedResponseDataObject getDatasetsAnnotationsUsageStatistics( - @QueryParam("query") String query, + @QueryParam("query") QueryArg query, @QueryParam("filter") @DefaultValue("") FilterArg filter, @Parameter(description = "List of fields to exclude from the payload. Only `parentTerms` can be excluded.") @QueryParam("exclude") ExcludeArg exclude, @Parameter(description = "Maximum number of annotations to returned; capped at " + MAX_DATASETS_ANNOTATIONS + ".", schema = @Schema(type = "integer", minimum = "1", maximum = "" + MAX_DATASETS_ANNOTATIONS)) @QueryParam("limit") LimitArg limitArg, @@ -375,31 +384,31 @@ public LimitedResponseDataObject getDa // ensure that implied terms are retained in the usage frequency Collection mentionedTerms = retainMentionedTerms ? new HashSet<>() : null; Filters filters = datasetArgService.getFilters( filter, mentionedTerms ); - Filters filtersWithQuery; + Set extraIds; if ( query != null ) { - filtersWithQuery = Filters.by( filters ).and( datasetArgService.getFilterForSearchQuery( query, null ) ); + extraIds = datasetArgService.getIdsForSearchQuery( query, null ); } else { - filtersWithQuery = filters; + extraIds = null; } if ( category != null && category.isEmpty() ) { category = ExpressionExperimentService.UNCATEGORIZED; } - // cache for visited parents (if two term share the same parent, we can save significant time generating the ancestors) - Map> visited = new HashMap<>(); List initialResults = expressionExperimentService.getAnnotationsUsageFrequency( - filtersWithQuery, - limit, - minFrequency != null ? minFrequency : 0, + filters, + extraIds, category, datasetArgService.getExcludedUris( excludedCategoryUris, excludeFreeTextCategories, excludeUncategorizedTerms ), datasetArgService.getExcludedUris( excludedTermUris, excludeFreeTextTerms, excludeUncategorizedTerms ), - mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null ); + minFrequency != null ? minFrequency : 0, + mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null, + limit ); + // cache for visited parents (if two term share the same parent, we can save significant time generating the ancestors) + Map> visited = new HashMap<>(); List results = initialResults .stream() .map( e -> new AnnotationWithUsageStatisticsValueObject( e.getCharacteristic(), e.getNumberOfExpressionExperiments(), !excludeParentTerms && e.getTerm() != null ? getParentTerms( e.getTerm(), visited ) : null ) ) - .sorted( Comparator.comparing( UsageStatistics::getNumberOfExpressionExperiments, Comparator.reverseOrder() ) ) .collect( Collectors.toList() ); - return Responder.limit( results, query, filters, new String[] { "classUri", "className", "termUri", "termName" }, + return Responder.limit( results, query != null ? query.getValue() : null, filters, new String[] { "classUri", "className", "termUri", "termName" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ), limit ); } @@ -415,25 +424,31 @@ private Set getExcludedFields( @Nullable ExcludeArg( exclude.getValue() ); } - - private static Set getParentTerms( OntologyTerm c, Map> visited ) { - return c.getParents( true, false ).stream() - .map( t -> toTermVo( t, visited ) ) - .collect( Collectors.toSet() ); + private Set getParentTerms( OntologyTerm c, Map> visited ) { + return getParentTerms( c, new LinkedHashSet<>(), visited ); } - private static OntologyTermValueObject toTermVo( OntologyTerm ontologyTerm, Map> visited ) { - Set parentVos; - if ( visited.containsKey( ontologyTerm ) ) { - parentVos = visited.get( ontologyTerm ); - } else { - visited.put( ontologyTerm, Collections.emptySet() ); - parentVos = ontologyTerm.getParents( true, false ).stream() - .map( t -> toTermVo( t, visited ) ) - .collect( Collectors.toSet() ); - visited.put( ontologyTerm, parentVos ); - } - return new OntologyTermValueObject( ontologyTerm, parentVos ); + private Set getParentTerms( OntologyTerm c, LinkedHashSet stack, Map> visited ) { + return ontologyService.getParents( Collections.singleton( c ), true, true ).stream() + .map( t -> { + Set parentVos; + if ( stack.contains( t ) ) { + log.debug( "Detected a cycle when visiting " + t + ": " + stack.stream() + .map( ot -> ot.equals( t ) ? ot + "*" : ot.toString() ) + .collect( Collectors.joining( " -> " ) ) + " -> " + t + "*" ); + return null; + } else if ( visited.containsKey( t ) ) { + parentVos = visited.get( t ); + } else { + stack.add( t ); + parentVos = getParentTerms( t, stack, visited ); + stack.remove( t ); + visited.put( t, parentVos ); + } + return new OntologyTermValueObject( t, parentVos ); + } ) + .filter( Objects::nonNull ) + .collect( Collectors.toSet() ); } @Value @@ -465,7 +480,7 @@ public static class AnnotationWithUsageStatisticsValueObject extends AnnotationV Long numberOfExpressionExperiments; /** - * URIs of parent terms. + * URIs of parent terms, or null if excluded. */ @Nullable @JsonInclude(JsonInclude.Include.NON_NULL) @@ -485,18 +500,21 @@ public AnnotationWithUsageStatisticsValueObject( Characteristic c, Long numberOf @Produces(MediaType.APPLICATION_JSON) @Operation(summary = "Retrieve taxa usage statistics for datasets matching the provided query and filter") public QueriedAndFilteredResponseDataObject getDatasetsTaxaUsageStatistics( - @QueryParam("query") String query, @QueryParam("filter") @DefaultValue("") FilterArg filterArg ) { + @QueryParam("query") QueryArg query, + @QueryParam("filter") @DefaultValue("") FilterArg filterArg + ) { Filters filters = datasetArgService.getFilters( filterArg ); - Filters filtersWithQuery; + Set extraIds; if ( query != null ) { - filtersWithQuery = Filters.by( filters ).and( datasetArgService.getFilterForSearchQuery( query, null ) ); + extraIds = datasetArgService.getIdsForSearchQuery( query, null ); } else { - filtersWithQuery = filters; + extraIds = null; } - return Responder.queryAndFilter( expressionExperimentService.getTaxaUsageFrequency( filtersWithQuery ) + return Responder.queryAndFilter( expressionExperimentService.getTaxaUsageFrequency( filters, extraIds ) .entrySet().stream() + .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) .map( e -> new TaxonWithUsageStatisticsValueObject( e.getKey(), e.getValue() ) ) - .collect( Collectors.toList() ), query, filters, new String[] { "id" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ) ); + .collect( Collectors.toList() ), query != null ? query.getValue() : null, filters, new String[] { "id" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ) ); } @Value diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/PlatformsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/PlatformsWebService.java index f51386170a..ab76523dbf 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/PlatformsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/PlatformsWebService.java @@ -40,8 +40,10 @@ import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; /** * RESTful interface for platforms. @@ -268,7 +270,11 @@ public FilteredAndPaginatedResponseDataObject getPlatformElemen public Response getPlatformAnnotations( // Params: @PathParam("platform") PlatformArg platformArg // Optional, default null ) { - return outputAnnotationFile( arrayDesignArgService.getEntity( platformArg ) ); + try { + return outputAnnotationFile( arrayDesignArgService.getEntity( platformArg ) ); + } catch ( IOException e ) { + throw new InternalServerErrorException( e ); + } } /** @@ -277,7 +283,7 @@ public Response getPlatformAnnotations( // Params: * @param arrayDesign the platform to fetch and output the annotation file for. * @return a Response object containing the annotation file. */ - private Response outputAnnotationFile( ArrayDesign arrayDesign ) { + private Response outputAnnotationFile( ArrayDesign arrayDesign ) throws IOException { String fileName = arrayDesign.getShortName().replaceAll( Pattern.quote( "/" ), "_" ) + ArrayDesignAnnotationService.STANDARD_FILE_SUFFIX + ArrayDesignAnnotationService.ANNOTATION_FILE_SUFFIX; @@ -293,8 +299,7 @@ private Response outputAnnotationFile( ArrayDesign arrayDesign ) { throw new NotFoundException( String.format( ERROR_ANNOTATION_FILE_NOT_AVAILABLE, arrayDesign.getShortName() ) ); } } - - return Response.ok( file ) + return Response.ok( new GZIPInputStream( new FileInputStream( file ) ) ) .header( "Content-Encoding", "gzip" ) .header( "Content-Disposition", "attachment; filename=" + FilenameUtils.removeExtension( file.getName() ) ) .build(); diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java index 94295d6842..aa76d54662 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java @@ -7,23 +7,18 @@ import io.swagger.v3.oas.annotations.media.Schema; import lombok.Value; import lombok.extern.apachecommons.CommonsLog; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.search.highlight.QueryScorer; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.MediaType; import org.springframework.stereotype.Service; import org.springframework.web.servlet.support.ServletUriComponentsBuilder; -import ubic.gemma.core.search.DefaultHighlighter; -import ubic.gemma.core.search.SearchException; -import ubic.gemma.core.search.SearchResult; -import ubic.gemma.core.search.SearchService; +import ubic.gemma.core.search.*; import ubic.gemma.core.search.lucene.SimpleMarkdownFormatter; import ubic.gemma.model.IdentifiableValueObject; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.BibliographicReferenceValueObject; +import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject; import ubic.gemma.model.expression.designElement.CompositeSequenceValueObject; @@ -32,7 +27,6 @@ import ubic.gemma.model.genome.TaxonValueObject; import ubic.gemma.model.genome.gene.GeneSetValueObject; import ubic.gemma.model.genome.gene.GeneValueObject; -import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.genome.taxon.TaxonService; @@ -62,7 +56,7 @@ public class SearchWebService { /** - * Name used in the OpenAPI schema to identify result types as per {@link #search(String, TaxonArg, PlatformArg, List, LimitArg, ExcludeArg)}'s + * Name used in the OpenAPI schema to identify result types as per {@link #search(QueryArg, TaxonArg, PlatformArg, List, LimitArg, ExcludeArg)}'s * fourth argument. */ public static final String RESULT_TYPES_SCHEMA_NAME = "SearchResultType"; @@ -99,6 +93,10 @@ private class Highlighter extends DefaultHighlighter { private int highlightedDocuments = 0; + public Highlighter() { + super( new SimpleMarkdownFormatter() ); + } + @Override public Map highlightTerm( @Nullable String uri, String label, String field ) { String searchUrl = ServletUriComponentsBuilder.fromRequest( request ) @@ -110,17 +108,12 @@ public Map highlightTerm( @Nullable String uri, String label, St } @Override - public org.apache.lucene.search.highlight.Highlighter createLuceneHighlighter( QueryScorer queryScorer ) { - return new org.apache.lucene.search.highlight.Highlighter( new SimpleMarkdownFormatter(), queryScorer ); - } - - @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ) { + public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ) { if ( highlightedDocuments >= MAX_HIGHLIGHTED_DOCUMENTS ) { return Collections.emptyMap(); } highlightedDocuments++; - return super.highlightDocument( document, highlighter, analyzer, fields ); + return super.highlightDocument( document, highlighter, analyzer ); } } @@ -133,14 +126,16 @@ public Map highlightDocument( Document document, org.apache.luce @GZIP @Produces(MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Search everything in Gemma") - public SearchResultsResponseDataObject search( @QueryParam("query") String query, + public SearchResultsResponseDataObject search( + @QueryParam("query") QueryArg query, @QueryParam("taxon") TaxonArg taxonArg, @QueryParam("platform") PlatformArg platformArg, @Parameter(array = @ArraySchema(schema = @Schema(name = RESULT_TYPES_SCHEMA_NAME, hidden = true))) @QueryParam("resultTypes") List resultTypes, @Parameter(description = "Maximum number of search results to return; capped at " + MAX_SEARCH_RESULTS + " unless `resultObject` is excluded.", schema = @Schema(type = "integer", minimum = "1", maximum = "" + MAX_SEARCH_RESULTS)) @QueryParam("limit") LimitArg limit, - @Parameter(description = "List of fields to exclude from the payload. Only `resultObject` is supported.") @QueryParam("exclude") ExcludeArg> excludeArg ) { - if ( StringUtils.isBlank( query ) ) { - throw new BadRequestException( "A non-empty query must be supplied." ); + @Parameter(description = "List of fields to exclude from the payload. Only `resultObject` is supported.") @QueryParam("exclude") ExcludeArg> excludeArg + ) { + if ( query == null ) { + throw new BadRequestException( "A query must be supplied." ); } Map> supportedResultTypesByName = searchService.getSupportedResultTypes().stream() .collect( Collectors.toMap( Class::getName, identity() ) ); @@ -167,7 +162,7 @@ public SearchResultsResponseDataObject search( @QueryParam("query") String query } SearchSettings searchSettings = SearchSettings.builder() - .query( query ) + .query( query.getValue() ) .taxon( taxonArg != null ? taxonArgService.getEntity( taxonArg ) : null ) .platformConstraint( platformArg != null ? platformArgService.getEntity( platformArg ) : null ) .resultTypes( resultTypesCls ) @@ -179,8 +174,10 @@ public SearchResultsResponseDataObject search( @QueryParam("query") String query List> searchResults; try { searchResults = searchService.search( searchSettings ).toList(); + } catch ( ParseSearchException e ) { + throw new BadRequestException( e.getMessage(), e ); } catch ( SearchException e ) { - throw new BadRequestException( String.format( "Invalid search settings: %s.", ExceptionUtils.getRootCauseMessage( e ) ), e ); + throw new InternalServerErrorException( e ); } List>> searchResultVos; @@ -190,7 +187,7 @@ public SearchResultsResponseDataObject search( @QueryParam("query") String query searchResultVos = searchService.loadValueObjects( searchResults ); } else { searchResultVos = searchResults.stream() - .map( sr -> SearchResult.from( sr, ( IdentifiableValueObject ) null ) ) + .map( sr -> sr.withResultObject( ( IdentifiableValueObject ) null ) ) .collect( Collectors.toList() ); } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java b/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java index 676f1dc116..e04e33745a 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java @@ -14,25 +14,32 @@ import io.swagger.v3.oas.models.security.SecurityRequirement; import lombok.Value; import lombok.extern.apachecommons.CommonsLog; +import org.apache.commons.io.IOUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.MessageSource; import org.springframework.context.MessageSourceResolvable; +import org.springframework.core.io.ClassPathResource; import org.springframework.security.access.ConfigAttribute; import org.springframework.stereotype.Component; +import org.springframework.util.StringUtils; import ubic.gemma.core.search.SearchService; import ubic.gemma.model.common.Identifiable; import ubic.gemma.rest.SearchWebService; import ubic.gemma.rest.util.args.*; import javax.annotation.Nullable; +import java.io.IOException; import java.lang.annotation.Annotation; +import java.nio.charset.StandardCharsets; import java.util.*; import java.util.stream.Collectors; +import static org.apache.commons.text.StringEscapeUtils.escapeHtml4; + /** * Resolve {@link Arg} parameters' schema. - * + *

* This should always be added last with {@link ModelConverters#addConverter(ModelConverter)} to take priority as it * addresses a glitch in the original {@link ModelResolver}. * @@ -44,6 +51,12 @@ public class CustomModelResolver extends ModelResolver { private final SearchService searchService; + @Autowired + private List> entityArgServices; + + @Autowired + private MessageSource messageSource; + @Autowired public CustomModelResolver( @Qualifier("swaggerObjectMapper") ObjectMapper objectMapper, SearchService searchService ) { super( objectMapper ); @@ -60,7 +73,7 @@ public Schema resolve( AnnotatedType type, ModelConverterContext context, Iterat } if ( t.isTypeOrSubTypeOf( FilterArg.Filter.class ) || t.isTypeOrSubTypeOf( SortArg.Sort.class ) ) { return null; // ignore those... - } else if ( t.isTypeOrSubTypeOf( FilterArg.class ) || t.isTypeOrSubTypeOf( SortArg.class ) ) { + } else if ( t.isTypeOrSubTypeOf( FilterArg.class ) || t.isTypeOrSubTypeOf( SortArg.class ) || t.isTypeOrSubTypeOf( QueryArg.class ) ) { Schema resolved = super.resolve( type, context, chain ); String ref = resolved.get$ref(); // FilterArg and SortArg schemas in parameters are refs to globally-defined schemas and those are @@ -76,7 +89,7 @@ public Schema resolve( AnnotatedType type, ModelConverterContext context, Iterat // definitions in the class's Schema annotation Schema resolvedSchema = super.resolve( new AnnotatedType( t.getRawClass() ), context, chain ); // There's a bug with abstract class such as TaxonArg and GeneArg that result in the schema containing 'type' - // and 'properties' fields instead of solely emiting the oneOf + // and 'properties' fields instead of solely emitting the oneOf if ( t.isAbstract() ) { return resolvedSchema.type( null ).properties( null ); } else { @@ -88,7 +101,7 @@ public Schema resolve( AnnotatedType type, ModelConverterContext context, Iterat } /** - * Resolves allowed values for the {@link ubic.gemma.rest.SearchWebService#search(String, TaxonArg, PlatformArg, List, LimitArg, ExcludeArg)} + * Resolves allowed values for the {@link ubic.gemma.rest.SearchWebService#search(QueryArg, TaxonArg, PlatformArg, List, LimitArg, ExcludeArg)} * resultTypes argument. *

* This ensures that the OpenAPI specification exposes all supported search result types in the {@link SearchService} as @@ -112,6 +125,20 @@ protected String resolveDescription( Annotated a, Annotation[] annotations, io.s return description == null ? availableProperties : description + "\n\n" + availableProperties; } + if ( a != null && QueryArg.class.isAssignableFrom( a.getRawType() ) ) { + try { + return ( description != null ? description + "\n\n" : "" ) + + IOUtils.toString( new ClassPathResource( "/restapidocs/fragments/QueryType.md" ).getInputStream(), StandardCharsets.UTF_8 ) + // this part of the template is using embedded HTML in Markdown + .replace( "{searchableProperties}", getSearchableProperties().entrySet().stream() + .map( e -> "

" + escapeHtml4( e.getKey() ) + "

" + + "
    " + e.getValue().stream().map( v -> "
  • " + escapeHtml4( v ) + "
  • " ).collect( Collectors.joining() ) + "
" ) + .collect( Collectors.joining() ) ); + } catch ( IOException e ) { + throw new RuntimeException( e ); + } + } + return description; } @@ -123,11 +150,28 @@ protected Map resolveExtensions( Annotated a, Annotation[] annot extensions.put( "x-gemma-filterable-properties", resolveAvailableProperties( a ) ); extensions = Collections.unmodifiableMap( extensions ); } + if ( a != null && QueryArg.class.isAssignableFrom( a.getRawType() ) ) { + extensions = extensions != null ? new HashMap<>( extensions ) : new HashMap<>(); + extensions.put( "x-gemma-searchable-properties", getSearchableProperties() ); + extensions = Collections.unmodifiableMap( extensions ); + } return extensions; } - @Autowired - private List> entityArgServices; + private final Comparator FIELD_COMPARATOR = Comparator + .comparing( ( String s ) -> StringUtils.countOccurrencesOf( s, "." ), Comparator.naturalOrder() ) + .thenComparing( s -> s ); + + private Map> getSearchableProperties() { + Map> sp = new HashMap<>(); + for ( Class resultType : searchService.getSupportedResultTypes() ) { + List fields = searchService.getFields( resultType ).stream().sorted( FIELD_COMPARATOR ).collect( Collectors.toList() ); + if ( !fields.isEmpty() ) { + sp.put( resultType.getName(), fields ); + } + } + return sp; + } @Value private static class FilterablePropMeta { @@ -149,8 +193,6 @@ private static class FilterablePropMetaAllowedValue { String label; } - @Autowired - private MessageSource messageSource; private List resolveAvailableProperties( Annotated a ) { // this is the case for FilterArg and SortArg diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java index 0cad88745d..473f38a338 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java @@ -1,15 +1,12 @@ package ubic.gemma.rest.util.args; -import org.apache.commons.lang3.StringUtils; +import lombok.extern.apachecommons.CommonsLog; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.core.analysis.preprocess.OutlierDetails; import ubic.gemma.core.analysis.preprocess.OutlierDetectionService; -import ubic.gemma.core.search.Highlighter; -import ubic.gemma.core.search.SearchException; -import ubic.gemma.core.search.SearchResult; -import ubic.gemma.core.search.SearchService; +import ubic.gemma.core.search.*; import ubic.gemma.model.common.description.AnnotationValueObject; import ubic.gemma.model.common.quantitationtype.QuantitationTypeValueObject; import ubic.gemma.model.common.search.SearchSettings; @@ -27,10 +24,12 @@ import javax.annotation.Nullable; import javax.ws.rs.BadRequestException; +import javax.ws.rs.InternalServerErrorException; import java.util.*; import java.util.stream.Collectors; @Service +@CommonsLog public class DatasetArgService extends AbstractEntityArgService { private final SearchService searchService; @@ -105,20 +104,19 @@ public Filters getFilters( FilterArg filterArg, @Nullable * @param highlighter a highlighter to use for the query or null to ignore * @throws BadRequestException if the query is empty */ - public List> getResultsForSearchQuery( String query, @Nullable Highlighter highlighter ) throws BadRequestException { - if ( StringUtils.isBlank( query ) ) { - throw new BadRequestException( "A non-empty query must be supplied." ); - } + public List> getResultsForSearchQuery( QueryArg query, @Nullable Highlighter highlighter ) throws BadRequestException { try { SearchSettings settings = SearchSettings.builder() - .query( query ) + .query( query.getValue() ) .resultType( ExpressionExperiment.class ) .highlighter( highlighter ) .fillResults( false ) .build(); return searchService.search( settings ).getByResultObjectType( ExpressionExperiment.class ); + } catch ( ParseSearchException e ) { + throw new MalformedArgException( e.getMessage(), e ); } catch ( SearchException e ) { - throw new MalformedArgException( "Invalid search query.", e ); + throw new InternalServerErrorException( e ); } } @@ -131,19 +129,14 @@ public List> getResultsForSearchQuery( String * @param scoreById if non-null, a destination for storing the scores by result ID * @throws BadRequestException if the query is empty */ - public Filter getFilterForSearchQuery( String query, @Nullable Map scoreById ) throws BadRequestException { + public Set getIdsForSearchQuery( QueryArg query, @Nullable Map scoreById ) throws BadRequestException { List> _results = getResultsForSearchQuery( query, null ); if ( scoreById != null ) { for ( SearchResult result : _results ) { scoreById.put( result.getResultId(), result.getScore() ); } } - Set ids = _results.stream().map( SearchResult::getResultId ).collect( Collectors.toSet() ); - if ( ids.isEmpty() ) { - return service.getFilter( "id", Long.class, Filter.Operator.eq, -1L ); - } else { - return service.getFilter( "id", Long.class, Filter.Operator.in, ids ); - } + return _results.stream().map( SearchResult::getResultId ).collect( Collectors.toSet() ); } /** diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QueryArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QueryArg.java new file mode 100644 index 0000000000..7333835874 --- /dev/null +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QueryArg.java @@ -0,0 +1,32 @@ +package ubic.gemma.rest.util.args; + +import io.swagger.v3.oas.annotations.ExternalDocumentation; +import io.swagger.v3.oas.annotations.media.Schema; +import org.apache.commons.lang.StringUtils; +import ubic.gemma.rest.util.MalformedArgException; + +@Schema(type = "string", description = "Filter results matching the given full-text query.", + externalDocs = @ExternalDocumentation(url = "https://lucene.apache.org/core/3_6_2/queryparsersyntax.html")) +public class QueryArg implements Arg { + + private final String value; + + private QueryArg( String value ) { + this.value = value; + } + + @Override + public String getValue() { + return value; + } + + /** + * @throws MalformedArgException if the query string is blank + */ + public static QueryArg valueOf( String s ) throws MalformedArgException { + if ( StringUtils.isBlank( s ) ) { + throw new MalformedArgException( "The query cannot be empty." ); + } + return new QueryArg( s ); + } +} diff --git a/gemma-rest/src/main/resources/openapi-configuration.yaml b/gemma-rest/src/main/resources/openapi-configuration.yaml index fbcff0f1fe..9c342d32ba 100644 --- a/gemma-rest/src/main/resources/openapi-configuration.yaml +++ b/gemma-rest/src/main/resources/openapi-configuration.yaml @@ -8,7 +8,7 @@ openAPI: url: https://dev.gemma.msl.ubc.ca/rest/v2 info: title: Gemma RESTful API - version: 2.7.2 + version: 2.7.3 description: | This website documents the usage of the [Gemma RESTful API](https://gemma.msl.ubc.ca/rest/v2/). Here you can find example script usage of the API, as well as graphical interface for each endpoint, with description of its diff --git a/gemma-rest/src/main/resources/restapidocs/CHANGELOG.md b/gemma-rest/src/main/resources/restapidocs/CHANGELOG.md index c7e1801e5d..3118e023b5 100644 --- a/gemma-rest/src/main/resources/restapidocs/CHANGELOG.md +++ b/gemma-rest/src/main/resources/restapidocs/CHANGELOG.md @@ -1,5 +1,27 @@ ## Updates +### Update 2.7.3 + +- fix double-gzipping for the `getPlatformAnnotations` endpoint +- add a limit argument `getDatasetCategoriesUsageStatistics` with a default value of 200 +- more parent terms now include in `getDatasetAnnotationsUsageFrequency` +- search is much more efficient and now capable of handling more advanced syntax + +#### More free-text categories + +We've backfilled thousands of free-text categories from GEO sample metadata which resulted in +the `getDatasetCategoriesUsageFrequency` endpoint producing far more results than usual. This is now being alleviated +by a new `limit` parameter with a default value of 200. + +#### Complete inference for parent terms in `getDatasetAnnotationsUsageFrequency` + +The `getDatasetAnnotationsUsageFrequency` endpoint now include parent terms that satisfy the `hasPart` relation. We've +rewritten the logic under the hood to be much more efficient and cache frequently requested terms. + +#### Advanced search syntax + +The search endpoint and individual query parameters now support an advanced search syntax provided by Lucene. + ### Update 2.7.2 Expose statements in `FactorValueValueObject` and `FactorValueBasicValueObject`. diff --git a/gemma-rest/src/main/resources/restapidocs/fragments/QueryType.md b/gemma-rest/src/main/resources/restapidocs/fragments/QueryType.md new file mode 100644 index 0000000000..936ccea18b --- /dev/null +++ b/gemma-rest/src/main/resources/restapidocs/fragments/QueryType.md @@ -0,0 +1,15 @@ +The search query accepts the following syntax: + +| | | | +|--------------|-----------------------------|------------------------------------------------------------------------------------------------------------------------| +| Conjunction | `alpha AND beta AND gamma` | Results must contain "alpha", "beta" and "gamma". | +| Disjunction | `alpha OR beta OR gamma` | Results must contain either "alpha", "beta" or "gamma". This is the default when multiple terms are supplied. | +| Grouping | `(alpha OR beta) AND gamma` | Results must contain one of "alpha" or "beta" and also "gamma". | +| Exact Search | `"alpha beta gamma"` | Results must contain the exact phrase "alpha beta gamma". | +| Field | `shortName:GSE00001` | Results with short name GSE00001.
List of supported fields{searchableProperties}
| +| Prefix | `alpha*` | Results must start with "alpha". | +| Wildcard | `BRCA?` | Results can contain any letter for the `?`. In this example, BRCA1 and BRCA2 would be matched. | +| Fuzzy | `alpha~` | Results can approximate "alpha". In this example, "aleph" would be accepted. | +| Boosting | `alpha^2 beta` | Results mentioning "alpha" are ranked higher over those containing only "beta". | +| Require | `+alpha beta` | Results must mention "alpha" and optionally "beta". | +| Escape | `\+alpha` | Results must mention "+alpha". Any special character from the search syntax can be escaped by prepending it with "\". | diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/AnnotationsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/AnnotationsWebServiceTest.java index 767bf5c08c..fcaec45866 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/AnnotationsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/AnnotationsWebServiceTest.java @@ -126,7 +126,7 @@ public void testSearchTaxonDatasets() throws SearchException { ee.setId( 1L ); SearchService.SearchResultMap mockedSrMap = mock( SearchService.SearchResultMap.class ); when( mockedSrMap.getByResultObjectType( ExpressionExperiment.class ) ) - .thenReturn( Collections.singletonList( SearchResult.from( ExpressionExperiment.class, ee, 1.0, "test object" ) ) ); + .thenReturn( Collections.singletonList( SearchResult.from( ExpressionExperiment.class, ee, 1.0, null, "test object" ) ) ); when( searchService.search( any( SearchSettings.class ) ) ) .thenReturn( mockedSrMap ); when( taxonService.getFilter( eq( "commonName" ), eq( String.class ), eq( Filter.Operator.eq ), any( String.class ) ) ) diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index b160160c72..15cc416a1b 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -14,6 +14,7 @@ import ubic.gemma.core.analysis.preprocess.OutlierDetectionService; import ubic.gemma.core.analysis.preprocess.svd.SVDService; import ubic.gemma.core.analysis.service.ExpressionDataFileService; +import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchService; @@ -122,6 +123,11 @@ public AnalyticsProvider analyticsProvider() { public AccessDecisionManager accessDecisionManager() { return mock( AccessDecisionManager.class ); } + + @Bean + public OntologyService ontologyService() { + return mock(); + } } @Autowired @@ -214,20 +220,19 @@ public void testGetDatasetsWithQuery() throws SearchException { assertThat( s.isFillResults() ).isFalse(); assertThat( s.getHighlighter() ).isNotNull(); } ); - verify( expressionExperimentService ).getFilter( "id", Long.class, Filter.Operator.in, new HashSet<>( ids ) ); - verify( expressionExperimentService ).loadIdsWithCache( Filters.by( "ee", "id", Long.class, Filter.Operator.in, new HashSet<>( ids ) ), Sort.by( "ee", "id", Sort.Direction.ASC ) ); + verify( expressionExperimentService ).loadIdsWithCache( Filters.empty(), Sort.by( "ee", "id", Sort.Direction.ASC ) ); verify( expressionExperimentService ).loadValueObjectsByIdsWithRelationsAndCache( ids ); } @Test public void testGetDatasetsWithEmptyQuery() { - assertThat( target( "/datasets" ).queryParam( "query", "" ).request().get() ) + assertThat( target( "/datasets" ).queryParam( "query", " " ).request().get() ) .hasStatus( Response.Status.BAD_REQUEST ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); } private SearchResult createMockSearchResult( Long id ) { - return SearchResult.from( ExpressionExperiment.class, id, 0, "test result object" ); + return SearchResult.from( ExpressionExperiment.class, id, 0, null, "test result object" ); } @Test @@ -278,7 +283,7 @@ public void testGetDatasetsPlatformsUsageStatistics() { .hasEncoding( "gzip" ); verify( expressionExperimentService ).getFilter( "id", Filter.Operator.lessThan, "10" ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.by( f ), null ); - verify( expressionExperimentService ).getArrayDesignUsedOrOriginalPlatformUsageFrequency( Filters.by( f ), 50 ); + verify( expressionExperimentService ).getArrayDesignUsedOrOriginalPlatformUsageFrequency( Filters.by( f ), null, 50 ); } @Test @@ -294,7 +299,7 @@ public void testGetDatasetsAnnotationsWithRetainMentionedTerms() { .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), Collections.emptySet() ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 100, 0, null, null, null, Collections.emptySet() ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 0, Collections.emptySet(), 100 ); } @Test @@ -310,7 +315,7 @@ public void testGetDatasetsAnnotations() { .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), null ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 100, 0, null, null, null, null ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 0, null, 100 ); } @Test @@ -329,7 +334,7 @@ public void testGetDatasetsAnnotationsWhenMaxFrequencyIsSuppliedLimitMustUseMaxi .entity() .hasFieldOrPropertyWithValue( "limit", 5000 ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), null ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 5000, 10, null, null, null, null ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 10, null, 5000 ); } @Test @@ -341,7 +346,7 @@ public void testGetDatasetsAnnotationsWithLimitIsSupplied() { .hasFieldOrPropertyWithValue( "limit", 50 ) .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 50, 0, null, null, null, null ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 0, null, 50 ); } @Test @@ -349,7 +354,7 @@ public void testGetDatasetsAnnotationsForUncategorizedTerms() { assertThat( target( "/datasets/annotations" ).queryParam( "category", "" ).request().get() ) .hasStatus( Response.Status.OK ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 100, 0, ExpressionExperimentService.UNCATEGORIZED, null, null, null ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, ExpressionExperimentService.UNCATEGORIZED, null, null, 0, null, 100 ); } @Test @@ -357,7 +362,7 @@ public void testGetDatasetsCategories() { assertThat( target( "/datasets/categories" ).request().get() ) .hasStatus( Response.Status.OK ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); - verify( expressionExperimentService ).getCategoriesUsageFrequency( Filters.empty(), null, null, null ); + verify( expressionExperimentService ).getCategoriesUsageFrequency( Filters.empty(), null, null, null, null, 20 ); } @Test diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/OpenApiTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/OpenApiTest.java index e923d6037e..f9b050fbdf 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/OpenApiTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/OpenApiTest.java @@ -8,6 +8,7 @@ import io.swagger.v3.oas.models.OpenAPI; import io.swagger.v3.oas.models.media.Schema; import lombok.Data; +import org.assertj.core.api.Assertions; import org.junit.Before; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -49,8 +50,8 @@ public class OpenApiTest extends BaseJerseyTest { static class OpenApiTestContextConfiguration { @Bean - public CustomModelResolver customModelResolver() { - return new CustomModelResolver( Json.mapper(), mock( SearchService.class ) ); + public CustomModelResolver customModelResolver( SearchService searchService ) { + return new CustomModelResolver( Json.mapper(), searchService ); } @Bean @@ -99,6 +100,9 @@ public AccessDecisionManager accessDecisionManager() { @Autowired private CustomModelResolver customModelResolver; + @Autowired + private SearchService searchService; + @Autowired @Qualifier("swaggerObjectMapper") private ObjectMapper objectMapper; @@ -107,6 +111,8 @@ public AccessDecisionManager accessDecisionManager() { @Before public void setUpSpec() throws IOException { + when( searchService.getSupportedResultTypes() ).thenReturn( Collections.singleton( ExpressionExperiment.class ) ); + when( searchService.getFields( ExpressionExperiment.class ) ).thenReturn( Collections.singleton( "shortName" ) ); // FIXME: this is normally initialized in the servlet ModelConverters.getInstance().addConverter( customModelResolver ); Response response = target( "/openapi.json" ).request().get(); @@ -170,4 +176,22 @@ public void testLimitArgIs5000ForGetDatasetsAnnotations() { assertThat( p.getSchema().getMaximum() ).isEqualTo( "5000" ); } ); } + + @Test + public void testSearchableProperties() { + assertThat( spec.getPaths().get( "/search" ).getGet().getParameters() ) + .anySatisfy( p -> { + assertThat( p.getName() ).isEqualTo( "query" ); + assertThat( p.getSchema().get$ref() ).isEqualTo( "#/components/schemas/QueryArg" ); + } ); + assertThat( spec.getComponents().getSchemas().get( "QueryArg" ) ).satisfies( s -> { + assertThat( s.getType() ).isEqualTo( "string" ); + //noinspection unchecked + assertThat( s.getExtensions() ) + .isNotNull() + .containsEntry( "x-gemma-searchable-properties", Collections.singletonMap( ExpressionExperiment.class.getName(), Collections.singletonList( "shortName" ) ) ); + assertThat( s.getExternalDocs().getUrl() ) + .isEqualTo( "https://lucene.apache.org/core/3_6_2/queryparsersyntax.html" ); + } ); + } } diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java index 7d8df3f2cd..df8c34f41e 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java @@ -1,5 +1,9 @@ package ubic.gemma.rest; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.util.Version; +import org.hibernate.search.util.impl.PassThroughAnalyzer; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -7,14 +11,16 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.security.access.AccessDecisionManager; import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.ContextConfiguration; -import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; import org.springframework.test.context.web.WebAppConfiguration; import ubic.gemma.core.genome.gene.service.GeneService; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchService; +import ubic.gemma.core.search.lucene.LuceneParseSearchException; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.genome.Gene; @@ -26,10 +32,15 @@ import ubic.gemma.persistence.service.genome.ChromosomeService; import ubic.gemma.persistence.service.genome.taxon.TaxonService; import ubic.gemma.persistence.util.TestComponent; +import ubic.gemma.rest.analytics.AnalyticsProvider; +import ubic.gemma.rest.util.Assertions; +import ubic.gemma.rest.util.BaseJerseyTest; +import ubic.gemma.rest.util.JacksonConfig; import ubic.gemma.rest.util.args.*; import javax.ws.rs.BadRequestException; import javax.ws.rs.NotFoundException; +import javax.ws.rs.core.Response; import java.util.Collection; import java.util.Collections; import java.util.stream.Collectors; @@ -41,10 +52,11 @@ @ActiveProfiles("web") @WebAppConfiguration @ContextConfiguration -public class SearchWebServiceTest extends AbstractJUnit4SpringContextTests { +public class SearchWebServiceTest extends BaseJerseyTest { @Configuration @TestComponent + @Import(JacksonConfig.class) public static class SearchWebServiceTestContextConfiguration { @Bean @@ -76,6 +88,16 @@ public TaxonArgService taxonArgService( TaxonService taxonService ) { public PlatformArgService platformArgService( ArrayDesignService arrayDesignService ) { return new PlatformArgService( arrayDesignService, mock( ExpressionExperimentService.class ), mock( CompositeSequenceService.class ) ); } + + @Bean + public AnalyticsProvider analyticsProvider() { + return mock(); + } + + @Bean + public AccessDecisionManager accessDecisionManager() { + return mock(); + } } @Autowired @@ -91,7 +113,7 @@ public PlatformArgService platformArgService( ArrayDesignService arrayDesignServ private Gene gene; @Before - public void setUp() { + public void setUpMocks() { gene = new Gene(); gene.setId( 1L ); gene.setOfficialSymbol( "BRCA1" ); @@ -113,18 +135,18 @@ public void tearDown() { public void testSearchEverything() throws SearchException { ArgumentCaptor searchSettingsArgumentCaptor = ArgumentCaptor.forClass( SearchSettings.class ); SearchService.SearchResultMap srm = mock( SearchService.SearchResultMap.class ); - when( srm.toList() ).thenReturn( Collections.singletonList( SearchResult.from( Gene.class, gene, 1.0, "test object" ) ) ); + when( srm.toList() ).thenReturn( Collections.singletonList( SearchResult.from( Gene.class, gene, 1.0, null, "test object" ) ) ); when( searchService.search( searchSettingsArgumentCaptor.capture() ) ).thenReturn( srm ); when( searchService.loadValueObjects( any() ) ).thenAnswer( args -> { //noinspection unchecked Collection> searchResult = args.getArgument( 0, Collection.class ); return searchResult.stream() - .map( sr -> SearchResult.from( sr, new GeneValueObject( sr.getResultObject() ) ) ) + .map( sr -> sr.withResultObject( new GeneValueObject( sr.getResultObject() ) ) ) .collect( Collectors.toList() ); } ); when( searchService.getSupportedResultTypes() ).thenReturn( Collections.singleton( Gene.class ) ); - SearchWebService.SearchResultsResponseDataObject searchResults = searchWebService.search( "BRCA1", null, null, null, LimitArg.valueOf( "20" ), null ); + SearchWebService.SearchResultsResponseDataObject searchResults = searchWebService.search( QueryArg.valueOf( "BRCA1" ), null, null, null, LimitArg.valueOf( "20" ), null ); assertThat( searchSettingsArgumentCaptor.getValue() ) .hasFieldOrPropertyWithValue( "query", "BRCA1" ) @@ -147,38 +169,36 @@ public void testSearchEverything() throws SearchException { @Test public void testSearchByTaxon() throws SearchException { SearchService.SearchResultMap srm = mock( SearchService.SearchResultMap.class ); - when( srm.getByResultObjectType( Gene.class ) ).thenReturn( Collections.singletonList( SearchResult.from( Gene.class, gene, 1.0, "test object" ) ) ); + when( srm.getByResultObjectType( Gene.class ) ).thenReturn( Collections.singletonList( SearchResult.from( Gene.class, gene, 1.0, null, "test object" ) ) ); when( searchService.search( any() ) ).thenReturn( srm ); when( searchService.loadValueObject( any() ) ).thenAnswer( args -> { //noinspection unchecked SearchResult searchResult = args.getArgument( 0, SearchResult.class ); - SearchResult sr = SearchResult.from( searchResult.getResultType(), searchResult.getResultId(), searchResult.getScore(), "test object" ); - searchResult.setHighlights( searchResult.getHighlights() ); + SearchResult sr = SearchResult.from( searchResult.getResultType(), searchResult.getResultId(), searchResult.getScore(), searchResult.getHighlights(), "test object" ); if ( searchResult.getResultObject() != null ) { sr.setResultObject( new GeneValueObject( searchResult.getResultObject() ) ); } return sr; } ); - searchWebService.search( "BRCA1", TaxonArg.valueOf( "9606" ), null, null, LimitArg.valueOf( "20" ), null ); + searchWebService.search( QueryArg.valueOf( "BRCA1" ), TaxonArg.valueOf( "9606" ), null, null, LimitArg.valueOf( "20" ), null ); verify( taxonService ).findByNcbiId( 9606 ); } @Test public void testSearchByArrayDesign() throws SearchException { SearchService.SearchResultMap srm = mock( SearchService.SearchResultMap.class ); - when( srm.getByResultObjectType( Gene.class ) ).thenReturn( Collections.singletonList( SearchResult.from( Gene.class, gene, 1.0, "test object" ) ) ); + when( srm.getByResultObjectType( Gene.class ) ).thenReturn( Collections.singletonList( SearchResult.from( Gene.class, gene, 1.0, null, "test object" ) ) ); when( searchService.search( any() ) ).thenReturn( srm ); when( searchService.loadValueObject( any() ) ).thenAnswer( args -> { //noinspection unchecked SearchResult searchResult = args.getArgument( 0, SearchResult.class ); - SearchResult sr = SearchResult.from( searchResult.getResultType(), searchResult.getResultId(), searchResult.getScore(), "test object" ); - sr.setHighlights( searchResult.getHighlights() ); + SearchResult sr = SearchResult.from( searchResult.getResultType(), searchResult.getResultId(), searchResult.getScore(), searchResult.getHighlights(), "test object" ); if ( searchResult.getResultObject() != null ) { sr.setResultObject( new GeneValueObject( searchResult.getResultObject() ) ); } return sr; } ); - searchWebService.search( "BRCA1", null, PlatformArg.valueOf( "1" ), null, LimitArg.valueOf( "20" ), null ); + searchWebService.search( QueryArg.valueOf( "BRCA1" ), null, PlatformArg.valueOf( "1" ), null, LimitArg.valueOf( "20" ), null ); verify( arrayDesignService ).load( 1L ); } @@ -189,21 +209,39 @@ public void testSearchWhenQueryIsMissing() { @Test(expected = BadRequestException.class) public void testSearchWhenQueryIsEmpty() { - searchWebService.search( null, null, null, null, LimitArg.valueOf( "20" ), null ); + QueryArg.valueOf( "" ); } @Test(expected = NotFoundException.class) public void testSearchWhenUnknownTaxonIsProvided() { - searchWebService.search( "brain", TaxonArg.valueOf( "9607" ), null, null, LimitArg.valueOf( "20" ), null ); + searchWebService.search( QueryArg.valueOf( "brain" ), TaxonArg.valueOf( "9607" ), null, null, LimitArg.valueOf( "20" ), null ); } @Test(expected = NotFoundException.class) public void testSearchWhenUnknownPlatformIsProvided() { - searchWebService.search( "brain", null, PlatformArg.valueOf( "2" ), null, LimitArg.valueOf( "20" ), null ); + searchWebService.search( QueryArg.valueOf( "brain" ), null, PlatformArg.valueOf( "2" ), null, LimitArg.valueOf( "20" ), null ); } @Test(expected = BadRequestException.class) public void testSearchWhenUnsupportedResultTypeIsProvided() { - searchWebService.search( "brain", null, null, Collections.singletonList( "ubic.gemma.model.expression.designElement.CompositeSequence2" ), LimitArg.valueOf( "20" ), null ); + searchWebService.search( QueryArg.valueOf( "brain" ), null, null, Collections.singletonList( "ubic.gemma.model.expression.designElement.CompositeSequence2" ), LimitArg.valueOf( "20" ), null ); + } + + @Test + public void testSearchWithInvalidQuery() throws SearchException { + when( searchService.search( any() ) ).thenAnswer( a -> { + try { + new QueryParser( Version.LUCENE_36, "", new PassThroughAnalyzer( Version.LUCENE_36 ) ) + .parse( a.getArgument( 0, SearchSettings.class ).getQuery() ); + } catch ( ParseException e ) { + throw new LuceneParseSearchException( "\"", e.getMessage(), e ); + } + return mock(); + } ); + Assertions.assertThat( target( "/search" ).queryParam( "query", "\"" ).request().get() ) + .hasStatus( Response.Status.BAD_REQUEST ) + .entity() + .hasFieldOrPropertyWithValue( "error.code", 400 ) + .hasFieldOrPropertyWithValue( "error.message", "Cannot parse '\"': Lexical error at line 1, column 2. Encountered: after : \"\"" ); } } \ No newline at end of file diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/util/ResponseAssert.java b/gemma-rest/src/test/java/ubic/gemma/rest/util/ResponseAssert.java index 33fa22ab44..52fe654d19 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/util/ResponseAssert.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/util/ResponseAssert.java @@ -100,6 +100,10 @@ public ObjectAssert entityAs( Class clazz ) { } } + public StringAssert entityAsString() { + return new StringAssert( actual.readEntity( String.class ) ); + } + public InputStreamAssert entityAsStream() { return new InputStreamAssert( actual.readEntity( InputStream.class ) ); } diff --git a/gemma-web/pom.xml b/gemma-web/pom.xml index c04e695de2..a5ce47a550 100644 --- a/gemma-web/pom.xml +++ b/gemma-web/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.31.2 + 1.31.3 4.0.0 gemma-web diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/BaseController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/BaseController.java index b088a24bbe..98e33806e8 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/BaseController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/BaseController.java @@ -1,8 +1,8 @@ /* * The Gemma project - * + * * Copyright (c) 2006 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -116,11 +116,7 @@ protected void sendConfirmationEmail( HttpServletRequest request, String token, model.put( "confirmLink", Settings.getHostUrl() + servletContext.getContextPath() + "/confirmRegistration.html?key=" + token + "&username=" + username ); - SimpleMailMessage mailMessage = new SimpleMailMessage(); - mailMessage.setFrom( Settings.getAdminEmailAddress() ); - mailMessage.setSubject( getText( "signup.email.subject", request.getLocale() ) ); - mailMessage.setTo( username + "<" + email + ">" ); - mailEngine.sendMessage( mailMessage, templateName, model ); + mailEngine.sendMessage( username + "<" + email + ">", getText( "signup.email.subject", request.getLocale() ), templateName, model ); } catch ( Exception e ) { log.error( "Couldn't send email to " + email, e ); diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/BaseFormController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/BaseFormController.java index 180cccef7d..7e9bfc38db 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/BaseFormController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/BaseFormController.java @@ -23,7 +23,6 @@ import org.apache.commons.logging.LogFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.propertyeditors.CustomNumberEditor; -import org.springframework.mail.SimpleMailMessage; import org.springframework.validation.BindException; import org.springframework.validation.ObjectError; import org.springframework.web.bind.WebDataBinder; @@ -31,7 +30,6 @@ import org.springframework.web.multipart.support.ByteArrayMultipartFileEditor; import org.springframework.web.servlet.ModelAndView; import org.springframework.web.servlet.mvc.SimpleFormController; -import ubic.gemma.model.common.auditAndSecurity.User; import ubic.gemma.persistence.util.MailEngine; import ubic.gemma.web.util.MessageUtil; @@ -40,7 +38,6 @@ import javax.servlet.http.HttpSession; import java.text.NumberFormat; import java.util.Locale; -import java.util.Map; /** * Implementation of SimpleFormController that contains convenience methods for subclasses. For @@ -177,31 +174,4 @@ protected ModelAndView processFormSubmission( HttpServletRequest request, HttpSe return super.processFormSubmission( request, response, command, errors ); } - - /** - * Convenience message to send messages to users - */ - protected void sendEmail( User user, String msg ) { - if ( StringUtils.isBlank( user.getEmail() ) ) { - log.warn( "Could not send email to " + user + ", no email address" ); - } - log.debug( "sending e-mail to user [" + user.getEmail() + "]..." ); - SimpleMailMessage message = new SimpleMailMessage(); - message.setTo( user.getFullName() + "<" + user.getEmail() + ">" ); - - mailEngine.send( message ); - } - - /** - * Convenience message to send messages to users - */ - protected void sendEmail( User user, String templateName, Map model ) { - if ( StringUtils.isBlank( user.getEmail() ) ) { - log.warn( "Could not send email to " + user + ", no email address" ); - } - SimpleMailMessage message = new SimpleMailMessage(); - message.setTo( user.getFullName() + "<" + user.getEmail() + ">" ); - mailEngine.sendMessage( message, templateName, model ); - } - } \ No newline at end of file diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/GeneralSearchControllerImpl.java b/gemma-web/src/main/java/ubic/gemma/web/controller/GeneralSearchControllerImpl.java index ef76b61561..ccae6778a7 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/GeneralSearchControllerImpl.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/GeneralSearchControllerImpl.java @@ -60,10 +60,12 @@ import javax.annotation.ParametersAreNonnullByDefault; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import java.net.URI; import java.util.*; import java.util.stream.Collectors; import static org.apache.commons.text.StringEscapeUtils.escapeHtml4; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.prepareTermUriQuery; /** * Note: do not use parametrized collections as parameters for ajax methods in this class! Type information is lost @@ -202,12 +204,12 @@ public Map highlightTerm( @Nullable String uri, String value, St } @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ) { + public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ) { if ( highlightedDocuments >= MAX_HIGHLIGHTED_DOCUMENTS ) { return Collections.emptyMap(); } highlightedDocuments++; - return super.highlightDocument( document, highlighter, analyzer, fields ) + return super.highlightDocument( document, highlighter, analyzer ) .entrySet().stream() .collect( Collectors.toMap( e -> localizeField( StringUtils.substringAfterLast( document.get( "_hibernate_class" ), '.' ), e.getKey() ), Map.Entry::getValue, ( a, b ) -> b ) ); } @@ -225,13 +227,18 @@ public ModelAndView doSearch( HttpServletRequest request, HttpServletResponse re ModelAndView mav = new ModelAndView( "generalSearch" ); - if ( !this.searchStringValidator( command.getQuery() ) && StringUtils.isBlank( command.getTermUri() ) ) { + if ( !searchStringValidator( command.getQuery() ) ) { throw new IllegalArgumentException( "Invalid query" ); } // Need this for the bookmarkable links mav.addObject( "SearchString", command.getQuery() ); - mav.addObject( "SearchURI", command.getTermUri() ); + try { + URI termUri = prepareTermUriQuery( command ); + mav.addObject( "SearchURI", termUri != null ? termUri.toString() : null ); + } catch ( SearchException e ) { + mav.addObject( "SearchURI", null ); + } if ( ( command.getTaxon() != null ) && ( command.getTaxon().getId() != null ) ) mav.addObject( "searchTaxon", command.getTaxon().getScientificName() ); diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/common/auditAndSecurity/SecurityControllerImpl.java b/gemma-web/src/main/java/ubic/gemma/web/controller/common/auditAndSecurity/SecurityControllerImpl.java index 9889b3eb02..967fbfa4db 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/common/auditAndSecurity/SecurityControllerImpl.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/common/auditAndSecurity/SecurityControllerImpl.java @@ -27,7 +27,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.mail.SimpleMailMessage; import org.springframework.security.access.AccessDeniedException; import org.springframework.security.acls.model.Sid; import org.springframework.security.core.userdetails.UserDetails; @@ -118,18 +117,12 @@ public boolean addUserToGroup( String userName, String groupName ) { String emailAddress = u.getEmail(); if ( StringUtils.isNotBlank( emailAddress ) ) { SecurityControllerImpl.log.debug( "Sending email notification to " + emailAddress ); - SimpleMailMessage msg = new SimpleMailMessage(); - msg.setTo( emailAddress ); - msg.setFrom( Settings.getAdminEmailAddress() ); - msg.setSubject( "You have been added to a group on Gemma" ); - String manageGroupsUrl = Settings.getHostUrl() + servletContext.getContextPath() + "/manageGroups.html"; - msg.setText( userTakingAction.getUserName() + " has added you to the group '" + groupName + String body = userTakingAction.getUserName() + " has added you to the group '" + groupName + "'.\nTo view groups you belong to, visit " + manageGroupsUrl - + "\n\nIf you believe you received this email in error, contact " + Settings.getAdminEmailAddress() - + "." ); - - mailEngine.send( msg ); + + "\n\nIf you believe you received this email in error, contact " + mailEngine.getAdminEmailAddress() + + "."; + mailEngine.sendMessage( emailAddress, "You have been added to a group on Gemma", body ); } return true; diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/arrayDesign/ArrayDesignControllerImpl.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/arrayDesign/ArrayDesignControllerImpl.java index f3a11edaa7..bb1c75ace9 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/arrayDesign/ArrayDesignControllerImpl.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/arrayDesign/ArrayDesignControllerImpl.java @@ -57,6 +57,7 @@ import ubic.gemma.persistence.util.EntityUtils; import ubic.gemma.persistence.util.Filter; import ubic.gemma.persistence.util.Filters; +import ubic.gemma.persistence.util.Settings; import ubic.gemma.web.remote.EntityDelegator; import ubic.gemma.web.remote.JsonReaderResponse; import ubic.gemma.web.remote.ListBatchCommand; @@ -78,7 +79,7 @@ @RequestMapping("/arrays") public class ArrayDesignControllerImpl implements ArrayDesignController { - private static final String SUPPORT_EMAIL = "pavlab-support@msl.ubc.ca"; // FIXME factor out as config + private static final String SUPPORT_EMAIL = Settings.getString( "gemma.support.email" ); private static final Log log = LogFactory.getLog( ArrayDesignControllerImpl.class.getName() ); diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/AnnotationController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/AnnotationController.java index 0084168412..2edb2543dd 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/AnnotationController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/AnnotationController.java @@ -28,6 +28,7 @@ import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.core.job.executor.webapp.TaskRunningService; import ubic.gemma.core.ontology.OntologyService; +import ubic.gemma.core.search.ParseSearchException; import ubic.gemma.core.search.SearchException; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.description.CharacteristicValueObject; @@ -40,6 +41,7 @@ import ubic.gemma.persistence.service.genome.taxon.TaxonService; import ubic.gemma.web.util.EntityNotFoundException; +import javax.ws.rs.InternalServerErrorException; import java.util.Collection; import java.util.HashSet; import java.util.Set; @@ -102,7 +104,8 @@ public void createExperimentTag( Characteristic vc, Long id ) { if ( vc == null ) { throw new IllegalArgumentException( "Null characteristic" ); } - if ( ontologyService.isObsolete( vc.getValueUri() ) ) { + OntologyTerm term = ontologyService.getTerm( vc.getValueUri() ); + if ( vc.getValueUri() != null && term != null && term.isObsolete() ) { throw new IllegalArgumentException( vc + " is an obsolete term! Not saving." ); } expressionExperimentService.addCharacteristic( ee, vc ); @@ -131,15 +134,17 @@ public Collection findTerm( String givenQueryString, int numfilled = 0; int maxfilled = 25; // presuming we don't need to look too far down the list ... just as a start. for ( CharacteristicValueObject cvo : sortedResults ) { - cvo.setValueDefinition( ontologyService.getDefinition( cvo.getValueUri() ) ); + cvo.setValueDefinition( cvo.getValueUri() != null ? ontologyService.getDefinition( cvo.getValueUri() ) : null ); if ( ++numfilled > maxfilled ) { break; } } return sortedResults; + } catch ( ParseSearchException e ) { + throw new IllegalArgumentException( e.getMessage(), e ); } catch ( SearchException e ) { - throw new IllegalArgumentException( "Invalid search query.", e ); + throw new InternalServerErrorException( e ); } } diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/DEDVController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/DEDVController.java index ccf62ba1cc..2d0b57bf86 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/DEDVController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/DEDVController.java @@ -479,9 +479,11 @@ public VisualizationValueObject[] getDEDVForVisualization( Collection eeId Collection dedvs; if ( geneIds == null || geneIds.isEmpty() ) { - dedvs = processedExpressionDataVectorService.getProcessedDataArrays( ees.iterator().next(), SAMPLE_SIZE ); + dedvs = processedExpressionDataVectorService.getProcessedDataArrays( ees.iterator().next(), SAMPLE_SIZE ); + if ( dedvs.size() > SAMPLE_SIZE ) { + dedvs = new ArrayList<>( dedvs ).subList( 0, SAMPLE_SIZE ); + } } else { - if ( geneIds.size() > MAX_RESULTS_TO_RETURN ) { log.warn( geneIds.size() + " genes for visualization. Too many. Only using first " + MAX_RESULTS_TO_RETURN + " genes. " ); @@ -512,9 +514,9 @@ public VisualizationValueObject[] getDEDVForVisualization( Collection eeId time = watch.getTime(); watch.reset(); watch.start(); - if ( time > 100 ) { + if ( time > 500 ) { log.info( "Ran sortVectorDataByDesign on " + dedvs.size() + " DEDVs for " + eeIds.size() + " EEs" + " in " - + time + " ms (times <100ms not reported)." ); + + time + " ms (times <500ms not reported)." ); } watch.stop(); @@ -1235,7 +1237,7 @@ private VisualizationValueObject[] makeVisCollection( Collection 1000 ) { - log.info( "Created vis value objects in: " + time ); + log.info( "Created " + result.length + " vis value objects in: " + time ); } return result; diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExperimentalDesignController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExperimentalDesignController.java index d7d59648ad..e71d45f05d 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExperimentalDesignController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExperimentalDesignController.java @@ -637,7 +637,7 @@ public void updateBioMaterials( BioMaterialValueObject[] bmvos ) { Collection biomaterials = bioMaterialService.updateBioMaterials( Arrays.asList( bmvos ) ); - log.info( String.format( "Updating biomaterials took %.2f seconds", w.getTime() / 1000 ) ); + log.info( String.format( "Updating biomaterials took %.2f seconds", (double)w.getTime() / 1000.0 ) ); if ( biomaterials.isEmpty() ) return; @@ -773,7 +773,13 @@ public void updateFactorValueCharacteristics( FactorValueValueObject[] fvvos ) { Long charId = fvvo.getCharId(); // this is optional. Maybe we're actually adding a characteristic for the Statement c; if ( charId != null ) { - c = fv.getCharacteristics().stream().filter( s -> s.getId().equals( charId ) ).findFirst().orElseThrow( () -> new EntityNotFoundException( String.format( "No characteristic with ID %d in FactorValue with ID %d", charId, fvvo.getId() ) ) ); + c = fv.getCharacteristics().stream() + .filter( s -> s.getId().equals( charId ) ) + .findFirst() + .orElseThrow( () -> new EntityNotFoundException( String.format( "No characteristic with ID %d in FactorValue with ID %d", charId, fvvo.getId() ) ) ); + // updating the statement can alter its hashCode() and thus breaking the Set contract, we have to remove + // it and add it back before saving + fv.getCharacteristics().remove( c ); } else { c = Statement.Factory.newInstance(); } @@ -815,6 +821,10 @@ public void updateFactorValueCharacteristics( FactorValueValueObject[] fvvos ) { c.setSecondObjectUri( null ); } + if ( charId != null ) { + fv.getCharacteristics().add( c ); + } + fvs[i] = fv; statements[i] = c; } diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java index f253745fb5..2d89fc2d44 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java @@ -474,7 +474,7 @@ public Map loadCountsForDataSummaryTable() { countTimer.start(); long bioMaterialCount = expressionExperimentService.countBioMaterials( null ); long arrayDesignCount = arrayDesignService.countWithCache( null ); - long expressionExperimentCount = expressionExperimentService.countWithCache( null ); + long expressionExperimentCount = expressionExperimentService.countWithCache( null, null ); Map eesPerTaxon = expressionExperimentService.getPerTaxonCount(); countTimer.stop(); diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/genome/gene/GeneSetController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/genome/gene/GeneSetController.java index 5f144c029d..294ac7dc3f 100755 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/genome/gene/GeneSetController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/genome/gene/GeneSetController.java @@ -29,6 +29,7 @@ import org.springframework.web.servlet.ModelAndView; import ubic.gemma.core.genome.gene.SessionBoundGeneSetValueObject; import ubic.gemma.core.genome.gene.service.GeneSetService; +import ubic.gemma.core.search.ParseSearchException; import ubic.gemma.core.search.SearchException; import ubic.gemma.model.genome.TaxonValueObject; import ubic.gemma.model.genome.gene.DatabaseBackedGeneSetValueObject; @@ -38,6 +39,7 @@ import ubic.gemma.web.util.EntityNotFoundException; import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.InternalServerErrorException; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; @@ -190,8 +192,10 @@ public Collection findGeneSetsByGene( Long geneId ) { public Collection findGeneSetsByName( String query, Long taxonId ) { try { return geneSetService.findGeneSetsByName( query, taxonId ); + } catch ( ParseSearchException e ) { + throw new IllegalArgumentException( e.getMessage(), e ); } catch ( SearchException e ) { - throw new IllegalArgumentException( "Invalid search query.", e ); + throw new InternalServerErrorException( e ); } } diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/visualization/VisualizationValueObject.java b/gemma-web/src/main/java/ubic/gemma/web/controller/visualization/VisualizationValueObject.java index 36367055f5..a5c3406c81 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/visualization/VisualizationValueObject.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/visualization/VisualizationValueObject.java @@ -176,8 +176,8 @@ public VisualizationValueObject( Collection vectors, Li GeneExpressionProfile profile = new GeneExpressionProfile( vector, vectorGenes, color, valid, vector.getPvalue() ); - if ( !profile.isAllMissing() ) - profiles.add( profile ); + // if ( !profile.isAllMissing() ) // this might not be a desirable side-effect. + profiles.add( profile ); } } diff --git a/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml b/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml index 094e1d6eb5..7949b18295 100644 --- a/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml +++ b/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml @@ -26,6 +26,8 @@ + + @@ -33,14 +35,6 @@ - - - - - - + + + + + + + + ubic.gemma.model.expression.experiment.ExpressionExperiment + + + + + + + + <%--
--%> -
+
diff --git a/gemma-web/src/main/webapp/scripts/api/entities/experiment/ExpressionExperimentTools.js b/gemma-web/src/main/webapp/scripts/api/entities/experiment/ExpressionExperimentTools.js index f26c4e8252..893bd2e7d9 100755 --- a/gemma-web/src/main/webapp/scripts/api/entities/experiment/ExpressionExperimentTools.js +++ b/gemma-web/src/main/webapp/scripts/api/entities/experiment/ExpressionExperimentTools.js @@ -1,4 +1,4 @@ -Ext.namespace('Gemma'); +Ext.namespace( 'Gemma' ); Ext.BLANK_IMAGE_URL = ctxBasePath + '/images/default/s.gif'; /** @@ -11,938 +11,938 @@ Ext.BLANK_IMAGE_URL = ctxBasePath + '/images/default/s.gif'; * @extends Gemma.CurationTools * */ -Gemma.ExpressionExperimentTools = Ext.extend(Gemma.CurationTools, { - - allowScoreOverride: false, - experimentDetails: null, - tbar: new Ext.Toolbar(), - bconfFolded: true, - beffFolded: true, - qualFolded: true, - suitFolded: true, - - /** - * @memberOf Gemma.ExpressionExperimentTools - */ - initComponent: function () { - this.curatable = this.experimentDetails; - this.auditable = { - id: this.experimentDetails.id, - classDelegatingFor: "ubic.gemma.model.expression.experiment.ExpressionExperiment" - }; - Gemma.ExpressionExperimentTools.superclass.initComponent.call(this); - var manager = new Gemma.EEManager({ - editable: this.editable - }); - manager.on('reportUpdated', function () { - this.fireEvent('reloadNeeded'); - }, this); - - var self = this; - - var eeRow = new Ext.Panel({ - cls: 'ee-tool-row', - defaults: { - width: '100%', - border: false, - padding: 2 - } - }); - - eeRow.add({ - html: '
' - }); - - var refreshButton = new Ext.Button({ - text: '', - cls: 'btn-refresh nobreak', - tooltip: 'Refresh preprocessing statistics', - handler: function () { - manager.updateEEReport(this.experimentDetails.id); - }, - scope: this - }); - - var leftPanel = new Ext.Panel({ - cls: 'ee-tool-left', - defaults: { - border: false, - padding: 2 - } - }); +Gemma.ExpressionExperimentTools = Ext.extend( Gemma.CurationTools, { + + allowScoreOverride : false, + experimentDetails : null, + tbar : new Ext.Toolbar(), + bconfFolded : true, + beffFolded : true, + qualFolded : true, + suitFolded : true, + + /** + * @memberOf Gemma.ExpressionExperimentTools + */ + initComponent : function() { + this.curatable = this.experimentDetails; + this.auditable = { + id : this.experimentDetails.id, + classDelegatingFor : "ubic.gemma.model.expression.experiment.ExpressionExperiment" + }; + Gemma.ExpressionExperimentTools.superclass.initComponent.call( this ); + var manager = new Gemma.EEManager( { + editable : this.editable + } ); + manager.on( 'reportUpdated', function() { + this.fireEvent( 'reloadNeeded' ); + }, this ); + + var self = this; + + var eeRow = new Ext.Panel( { + cls : 'ee-tool-row', + defaults : { + width : '100%', + border : false, + padding : 2 + } + } ); + + eeRow.add( { + html : '
' + } ); + + var refreshButton = new Ext.Button( { + text : '', + cls : 'btn-refresh nobreak', + tooltip : 'Refresh preprocessing statistics', + handler : function() { + manager.updateEEReport( this.experimentDetails.id ); + }, + scope : this + } ); + + var leftPanel = new Ext.Panel( { + cls : 'ee-tool-left', + defaults : { + border : false, + padding : 2 + } + } ); + + leftPanel.add( {cls : 'nobreak', html : '

Preprocessing:

'} ); + leftPanel.add( refreshButton ); + + /* This does all preprocessing */ + leftPanel.add( this.processedVectorCreatePanelRenderer( this.experimentDetails, manager ) ); + + /* This is no longer needed as a separate step */ + // leftPanel.add(this.missingValueAnalysisPanelRenderer(this.experimentDetails, manager)); + + leftPanel.add( this.diagnosticsPanelRenderer( this.experimentDetails, manager ) ); + leftPanel.add( this.batchPanelRenderer( this.experimentDetails, manager ) ); + + // var batchInfoMissingPanel = this.batchInfoMissingRenderer(this.experimentDetails, manager); + var batchConfoundPanel = this.batchConfoundRenderer( this.experimentDetails, manager ); + var batchEffectPanel = this.batchEffectRenderer( this.experimentDetails, manager ); + if ( batchConfoundPanel !== null || batchEffectPanel !== null /*|| batchInfoMissingPanel !== null*/ ) { + leftPanel.add( {html : "

Batch info quality:

"} ); + // if (batchInfoMissingPanel !== null) leftPanel.add(batchInfoMissingPanel); + if ( batchConfoundPanel !== null ) leftPanel.add( batchConfoundPanel ); + if ( batchEffectPanel !== null ) leftPanel.add( batchEffectPanel ); + } + + leftPanel.add( {html : "

Analyses:

"} ); + leftPanel.add( this.differentialAnalysisPanelRenderer( this.experimentDetails, manager ) ); - leftPanel.add({cls: 'nobreak', html: '

Preprocessing:

'}); - leftPanel.add(refreshButton); - - /* This does all preprocessing */ - leftPanel.add(this.processedVectorCreatePanelRenderer(this.experimentDetails, manager)); - - /* This is no longer needed as a separate step */ - // leftPanel.add(this.missingValueAnalysisPanelRenderer(this.experimentDetails, manager)); - - leftPanel.add(this.diagnosticsPanelRenderer(this.experimentDetails, manager)); - leftPanel.add(this.batchPanelRenderer(this.experimentDetails, manager)); - - // var batchInfoMissingPanel = this.batchInfoMissingRenderer(this.experimentDetails, manager); - var batchConfoundPanel = this.batchConfoundRenderer(this.experimentDetails, manager); - var batchEffectPanel = this.batchEffectRenderer(this.experimentDetails, manager); - if (batchConfoundPanel !== null || batchEffectPanel !== null /*|| batchInfoMissingPanel !== null*/) { - leftPanel.add({html: "

Batch info quality:

"}); - // if (batchInfoMissingPanel !== null) leftPanel.add(batchInfoMissingPanel); - if (batchConfoundPanel !== null) leftPanel.add(batchConfoundPanel); - if (batchEffectPanel !== null) leftPanel.add(batchEffectPanel); - } - - leftPanel.add({html: "

Analyses:

"}); - leftPanel.add(this.differentialAnalysisPanelRenderer(this.experimentDetails, manager)); - // leftPanel.add(this.linkAnalysisPanelRenderer(this.experimentDetails, manager)); - eeRow.add(leftPanel); - - var rightPanel = new Ext.Panel({ - cls: 'ee-tool-right', - defaults: { - border: false, - padding: 2 - } - }); - - if (this.experimentDetails.geeq) { - if (this.experimentDetails.geeq.otherIssues && this.experimentDetails.geeq.otherIssues.trim()) { - rightPanel.add({ - html: - "
" + - "" + - "" + - "

There were some issues while scoring this experiment:

" + - "
" + this.experimentDetails.geeq.otherIssues + "
" + - "
" + - "
" + - "
" - }) - } - rightPanel.add(this.qualityRenderer(this.experimentDetails, manager)); - rightPanel.add(this.suitabilityRenderer(this.experimentDetails, manager)); - } else { - rightPanel.add({ - html: - '

Quality / Suitability

' + - '
Quality and Suitability not calculated for this experiment
' - }) - } - - var gqRecalcButton = new Ext.Button({ - text: "Recalculate score and refresh page (takes a minute)", - tooltip: + eeRow.add( leftPanel ); + + var rightPanel = new Ext.Panel( { + cls : 'ee-tool-right', + defaults : { + border : false, + padding : 2 + } + } ); + + if ( this.experimentDetails.geeq ) { + if ( this.experimentDetails.geeq.otherIssues && this.experimentDetails.geeq.otherIssues.trim() ) { + rightPanel.add( { + html : + "
" + + "" + + "" + + "

There were some issues while scoring this experiment:

" + + "
" + this.experimentDetails.geeq.otherIssues + "
" + + "
" + + "
" + + "
" + } ) + } + rightPanel.add( this.qualityRenderer( this.experimentDetails, manager ) ); + rightPanel.add( this.suitabilityRenderer( this.experimentDetails, manager ) ); + } else { + rightPanel.add( { + html : + '

Quality / Suitability

' + + '
Quality and Suitability not calculated for this experiment
' + } ) + } + + var gqRecalcButton = new Ext.Button( { + text : "Recalculate score and refresh page (takes a minute)", + tooltip : 'Runs full scoring. This usually takes around 1 minute to complete, but can take up to several minutes for large experiments.\n' + 'Page will refresh after this task has been finished', - cls: 'gq-btn btn-refresh gq-btn-recalc-all', - handler: function (b, e) { - b.setText("Recalculate score and refresh page (takes a minute)"); - b.setDisabled(true); - ExpressionExperimentController.runGeeq(self.experimentDetails.id, "all", { - callback: function () { - window.location.reload(); - } - }); - }, - scope: this - }); - - var recalcButtonWrap = new Ext.Panel({ - cls: 'extjs-sucks', - defaults: { - border: false, - padding: 0 - } - }); - - recalcButtonWrap.add(gqRecalcButton); - rightPanel.add(recalcButtonWrap); - - eeRow.add(rightPanel); - - this.add(eeRow); - }, - - suitabilityRenderer: function (ee, mgr) { - var panel = new Ext.Panel({ - defaults: { - border: false, - padding: 0 - }, - items: [{ - html: '

Suitability

' - }] - }); - - var sHead = new Ext.Panel({ - cls: 'gq-head', - defaults: { - border: false, - padding: 0 - } - }); - - var suitExtra = this.suitExtraRendeder(ee); - sHead.add(this.geeqRowRenderer("Public suitability score", ee.geeq.publicSuitabilityScore, - "This is the suitability score that is currently publicly displayed.", "", 2, null, suitExtra, true)); - if (this.allowScoreOverride) sHead.add(suitExtra); - this.allowSuitInput(ee.geeq.manualSuitabilityOverride); - - - panel.add(sHead); - - var sBody = new Ext.Panel({ - cls: 'gq-body', - defaults: { - border: false, - padding: 0 - } - }); - - var detailsButtonWrap = new Ext.Panel({ - cls: 'extjs-sucks', - defaults: { - border: false, - padding: 0 - } - }); - - var detailsButton = this.detailsButtonRenderer(sBody); - - detailsButtonWrap.add(detailsButton); - panel.add(detailsButtonWrap); - - var sPubDesc = - Number(ee.geeq.sScorePublication) === -1 ? "Experiment has no publication, try filling it in." : - "Experiment does have a publication filled in properly."; - - var sPlatfAmntDesc = - Number(ee.geeq.sScorePlatformAmount) === -1 ? "Experiment is on more than 2 platforms. Consider splitting the experiment." : - Number(ee.geeq.sScorePlatformAmount) === -0.5 ? "Experiment has 2 platforms. Consider splitting the experiment." : - "Experiment is on a single platform."; - - var sPlatfTechDesc = - Number(ee.geeq.sScorePlatformsTechMulti) === -1 ? "Experiment has two or more platforms that use different technologies. Experiment should be split." : "" + - "All used platforms use the same technology."; - - var sPlatfPopDesc = - Number(ee.geeq.sScoreAvgPlatformPopularity) === -1 ? "Platform(s) used (on average) by less than 10 experiments." : - Number(ee.geeq.sScoreAvgPlatformPopularity) === -0.5 ? "Platform(s) used (on average) by less than 20 experiments." : - Number(ee.geeq.sScoreAvgPlatformPopularity) === 0.0 ? "Platform(s) used (on average) by less than 50 experiments." : - Number(ee.geeq.sScoreAvgPlatformPopularity) === 0.5 ? "Platform(s) used (on average) by less than 100 experiments." : - "Platform(s) used (on average) by at least 100 experiments."; - - var sPlatfSizeDesc = - Number(ee.geeq.sScoreAvgPlatformSize) === -1 ? "Platform has (or all platforms have on average) very low gene covrage." : - Number(ee.geeq.sScoreAvgPlatformSize) === -0.5 ? "Platform has (or all platforms have on average) low gene coverage." : - Number(ee.geeq.sScoreAvgPlatformSize) === 0.0 ? "Platform has (or all platforms have on average) moderate gene coverage." : - Number(ee.geeq.sScoreAvgPlatformSize) === 0.5 ? "Platform has (or all platforms have on average) good gene coverage." : - "Platform has (or all paltforms have on average) excellent gene coverage."; - - var sSizeDesc = - Number(ee.geeq.sScoreSampleSize) === -1 ? "The experiment has less than 6 samples or more than 500 samples" : - Number(ee.geeq.sScoreSampleSize) === -0.3 ? "The experiment has less than 10 samples." : - Number(ee.geeq.sScoreSampleSize) === 0.0 ? "The experiment has less than 20 samples." : "The experiment has at least 20 samples."; - - var sRawDesc = - Number(ee.geeq.sScoreRawData) === -1 ? "Experiment has no raw data available (data are from external source). Try obtaining the raw data." - : "We do have raw data available for this experiment."; - - var sMissErr = - ee.geeq.noVectors === true ? "Experiment has no computed vectors, run the vector computation!" : ""; - var sMissDesc = - ee.geeq.noVectors === true ? "There are no computed vectors." : - Number(ee.geeq.sScoreMissingValues) === -1 ? "Experiment has missing values. Try filling them in, ideally by obtaining raw data." : - "There are no missing values."; - - sBody.add(this.geeqRowRenderer('Publication', ee.geeq.sScorePublication, - "Checks whether the experiment has a publication.", sPubDesc)); - - sBody.add(this.geeqRowRenderer('Platforms used', ee.geeq.sScorePlatformAmount, - "The amount of platforms the experiment uses.", sPlatfAmntDesc)); - - sBody.add(this.geeqRowRenderer('Platforms tech consistency', ee.geeq.sScorePlatformsTechMulti, - "Punishes technology inconsistency of multi-platform experiments.", sPlatfTechDesc)); - - sBody.add(this.geeqRowRenderer('Platforms usage', ee.geeq.sScoreAvgPlatformPopularity, - "Depends on the popularity (experiments that use the platform) of the used platform. If there are multiple platforms," + - "the popularity is averaged.", sPlatfPopDesc)); - - sBody.add(this.geeqRowRenderer('Platforms size', ee.geeq.sScoreAvgPlatformSize, - "Depends on the size (the number of elements) of the used platform. If there are multiple platforms, the" + - "size is averaged.", sPlatfSizeDesc)); - - sBody.add(this.geeqRowRenderer('Sample size', ee.geeq.sScoreSampleSize, - "Depends on the experiments size (number of samples).", sSizeDesc)); - - sBody.add(this.geeqRowRenderer('Raw data state', ee.geeq.sScoreRawData, - "Checks whether there was raw data available for this experiment.", sRawDesc)); - - sBody.add(this.geeqRowRenderer('Missing values', ee.geeq.sScoreMissingValues, - "Checks whether the experiment has any missing values.", sMissDesc, 1, sMissErr)); - - panel.add(sBody); - if (!sMissErr) { - sBody.hide(); - } - return panel; - }, - - qualityRenderer: function (ee, mgr) { - var panel = new Ext.Panel({ - defaults: { - border: false, - padding: 0 - }, - items: [{ - html: '

Quality

' - }] - }); - - var qHead = new Ext.Panel({ - cls: 'gq-head', - defaults: { - border: false, - padding: 0 - } - }); - - var qualExtra = this.qualExtraRendeder(ee); - qHead.add(this.geeqRowRenderer("Public quality score", ee.geeq.publicQualityScore, - "This is the quality score that is currently publicly displayed.", "", 2, null, qualExtra, true)); - if (this.allowScoreOverride) qHead.add(qualExtra); - this.allowQualInput(ee.geeq.manualQualityOverride); - - panel.add(qHead); - - var qBody = new Ext.Panel({ - cls: 'gq-body', - defaults: { - border: false, - padding: 0 - } - }); - - var detailsButtonWrap = new Ext.Panel({ - cls: 'extjs-sucks', - defaults: { - border: false, - padding: 0 - } - }); - - var detailsButton = this.detailsButtonRenderer(qBody); - - detailsButtonWrap.add(detailsButton); - panel.add(detailsButtonWrap); - - var qOutlErr = - Number(ee.geeq.corrMatIssues) === 1 ? "The correlation matrix is empty!" : - Number(ee.geeq.corrMatIssues) === 2 ? "There are NaN values in the correlation matrix." : - ""; - - var qOutlierDesc = - Number(ee.geeq.qScoreOutliers) === -1 ? "There are detected, non-removed outliers. Removing detected outliers will improve the score." : - "No outliers were detected."; - - var qPlatfTechMultiDesc = - Number(ee.geeq.qScorePlatformsTech) === -1 ? "The experiment is on a two-color platform." : "" + - "The experiment is NOT on a two-color platform."; - - var qReplErr = - Number(ee.geeq.replicatesIssues) === 1 ? "There is no experimental design for this experiment" : - Number(ee.geeq.replicatesIssues) === 2 ? "There are no factor values" : - Number(ee.geeq.replicatesIssues) === 3 ? "All factor-value combinations have no replicates." : - Number(ee.geeq.replicatesIssues) === 4 ? "The lowest replicate amount was 0 - this should be impossible, please report" : - ""; - - // These thresholds are defined - var qReplDesc = - Number(ee.geeq.qScoreReplicates) === -1 ? "There is a factor-value combination that has very few or no replicates." : - Number(ee.geeq.qScoreReplicates) === 0.0 ? "There is a factor-value combination that has moderately few replicates. " : - "All factor-value combinations have a good number of replicates"; - - var qBatchInfoDesc = - Number(ee.geeq.qScoreBatchInfo) === -1 ? "The experiment has no batch info. Try filling it in." : "" + - "Batch information provided."; - - var qBatchEffErr = - Number(ee.geeq.qScoreBatchInfo) === -1 ? "There is no batch information" : - Number(ee.geeq.qScoreBatchEffect) === 0.0 && Number(ee.geeq.qScoreBatchConfound) < 1 ? "Batch confound detected, batch effect detection skipped." : - ee.geeq.batchCorrected === true ? "Data was batch-corrected." : ""; - - var qBatchEffDesc = - ee.geeq.manualBatchEffectActive === true ? "Manually set value, detected score was: " + ee.geeq.qScoreBatchEffect : - Number(ee.geeq.qScoreBatchInfo) === -1 ? "There were problems when checking for batch effect." : - Number(ee.geeq.qScoreBatchEffect) === -1 ? "Experiment has a strong batch effect: the batch p-value is less than 0.0001. Try to batch-correct." : - Number(ee.geeq.qScoreBatchEffect) === 0.0 && Number(ee.geeq.qScoreBatchConfound) < 1 ? "Batch effect score defaults to 0 when data is confounded with batches." : - Number(ee.geeq.qScoreBatchEffect) === 0.0 ? "The experiment has some batch effect: the batch p-value is within [0.1, 0.0001]. Try to batch-correct." : - "The experiment has no or very weak batch effect: the batch p-value is more than 0.1."; - - var qBatchConfErr = - Number(ee.geeq.qScoreBatchInfo) === -1 ? "There is no batch information" : - ""; - - var qBatchConfDesc = - ee.geeq.manualBatchConfoundActive === true ? "Manually set value, detected score was: " + ee.geeq.qScoreBatchConfound : - Number(ee.geeq.qScoreBatchConfound) === -1 ? "Batch confound has been detected." : - Number(ee.geeq.qScoreBatchConfound) === 0.0 ? "There were problems when checking for batch confound." : - "The experiment does not seem to be confounded with the batches."; - - var bconfExtra = this.bconfExtraRendeder(ee); - var beffExtra = this.beffExtraRendeder(ee); - - this.allowBconfRadios(ee.geeq.manualBatchConfoundActive); - this.allowBeffRadios(ee.geeq.manualBatchEffectActive); - - qBody.add(this.geeqRowRenderer('Mean sample corr.', ee.geeq.qScoreSampleMeanCorrelation, - "[Not included in final score] The actual mean correlation of samples.", "Not included in final score", 4, qOutlErr)); - - qBody.add(this.geeqRowRenderer('Sample corr. variance', ee.geeq.qScoreSampleCorrelationVariance, - "[Not included in final score] The actual variance of sample correlation.", "Not included in final score", 4, qOutlErr)); - - qBody.add(this.geeqRowRenderer('Median sample corr.', ee.geeq.qScoreSampleMedianCorrelation, - "The actual median correlation of samples.", "Included in the final score. Can be somewhat improved by removing outliers.", 4, qOutlErr)); - - qBody.add(this.geeqRowRenderer('Outliers', ee.geeq.qScoreOutliers, - "Depends on the presence of detected (non-removed) outliers. If there are any outliers, the score will be low.", qOutlierDesc, 1, qOutlErr)); - - qBody.add(this.geeqRowRenderer('Platform technology', ee.geeq.qScorePlatformsTech, - "Checks whether the experiments platform (any one, if there are multiple) is two-color.", qPlatfTechMultiDesc)); - - qBody.add(this.geeqRowRenderer('Replicates', ee.geeq.qScoreReplicates, - "Checks the replicate amount of all factor-value combinations, and takes the lowest one.", qReplDesc, 1, qReplErr)); - - qBody.add(this.geeqRowRenderer('Batch info', ee.geeq.qScoreBatchInfo, - "Checks whether the experiment has batch info available.", qBatchInfoDesc)); - - qBody.add(this.geeqRowRenderer('Batch confound', ee.geeq.qScorePublicBatchConfound, - "Checks whether the experimental data are confounded with batches. This value is the currently publicly displayed information.", - qBatchConfDesc, 1, qBatchConfErr, bconfExtra)); - qBody.add(bconfExtra); - - qBody.add(this.geeqRowRenderer('Batch effect', ee.geeq.qScorePublicBatchEffect, - "Checks the experimental data for a batch effect. This value is the currently publicly displayed information.", - qBatchEffDesc, 1, qBatchEffErr, beffExtra)); - qBody.add(beffExtra); - - panel.add(qBody); - if (!qReplErr && !qOutlErr && !qBatchConfErr) { - qBody.hide(); - } - return panel; - }, - - detailsButtonRenderer: function (panel) { - return new Ext.Button({ - text: ' Show score breakdown and details', - cls: 'gq-btn gq-btn-details', - handler: function () { - this.showPanel(panel, !panel.isVisible()) - }, - scope: this - }); - }, - - bconfExtraRendeder: function (ee) { - - this.bconfFolded = !ee.geeq.manualBatchConfoundActive; - - var bconfExtra = new Ext.Panel({ - cls: 'gq-extra' + (this.bconfFolded ? ' folded' : ''), - defaults: { - border: false, - padding: 0 - } - }); - - var self = this; - var foldButton = new Ext.Button({ - text: '', - cls: 'gq-btn', - handler: function () { - this.foldPanel(bconfExtra, self.bconfFolded = !self.bconfFolded); - }, - scope: this - }); - - bconfExtra.add(foldButton); - - bconfExtra.add(new Ext.Button({ - text: 'Re-score batch info', - tooltip: 'Run geeq only for the batch info related scores (refreshes page).', - handler: function (b, e) { - b.setText("Re-score batch info"); - b.setDisabled(true); - ExpressionExperimentController.runGeeq(self.experimentDetails.id, "batch", { - callback: function () { - window.location.reload(); - } - }); - }, - scope: this, - cls: 'btn-refresh gq-subscore-refresh-btn' - })); - - bconfExtra.add(new Ext.form.Checkbox({ - xtype: 'checkbox', - id: 'gq-bconf-override', - boxLabel: 'Override:', - hideLabel: false, - checked: ee.geeq.manualBatchConfoundActive, - handler: function (el, value) { - self.allowBconfRadios(value); - ee.geeq.manualBatchConfoundActive = value; - document.getElementById('bconf-notification').removeAttribute("hidden"); - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: self.bconfNotifySaved - }); - } - })); - - bconfExtra.add(new Ext.form.Radio({ - xtype: 'radio', - id: 'gq-bconf-override-value-true', - name: 'gq-bconf-override-value', - boxLabel: 'Confounded', - hideLabel: false, - checked: ee.geeq.manualHasBatchConfound, - handler: function (el, value) { - ee.geeq.manualHasBatchConfound = value; - document.getElementById('bconf-notification').removeAttribute("hidden"); - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: self.bconfNotifySaved - }); - } - })); - - bconfExtra.add(new Ext.form.Radio({ - xtype: 'radio', - id: 'gq-bconf-override-value-false', - name: 'gq-bconf-override-value', - boxLabel: 'Not confounded', - hideLabel: false, - checked: !ee.geeq.manualHasBatchConfound - })); - - bconfExtra.add({cls: 'gq-notif hidden', html: ''}); - - return bconfExtra; - }, - - bconfNotifySaved: function () { - var nr = document.getElementById('bconf-notification'); - if (nr) { - nr.setAttribute("hidden", "true"); - } - }, - - beffExtraRendeder: function (ee) { - - this.beffFolded = !ee.geeq.manualBatchEffectActive; - - var beffExtra = new Ext.Panel({ - cls: 'gq-extra' + (this.beffFolded ? ' folded' : ''), - defaults: { - border: false, - padding: 0 - } - }); - - var self = this; - var foldButton = new Ext.Button({ - text: '', - cls: 'gq-btn', - handler: function () { - this.foldPanel(beffExtra, self.beffFolded = !self.beffFolded); - }, - scope: this - }); - - beffExtra.add(foldButton); - - beffExtra.add(new Ext.Button({ - text: 'Re-score batch info', - tooltip: 'Run geeq only for the batch info related scores (refreshes page).', - handler: function (b, e) { - b.setText("Re-score batch info"); - b.setDisabled(true); - ExpressionExperimentController.runGeeq(self.experimentDetails.id, "batch", { - callback: function () { - window.location.reload(); - } - }); - }, - scope: this, - cls: 'btn-refresh gq-subscore-refresh-btn' - })); - - beffExtra.add(new Ext.form.Checkbox({ - xtype: 'checkbox', - id: 'gq-beff-override', - boxLabel: 'Override:', - hideLabel: false, - checked: ee.geeq.manualBatchEffectActive, - handler: function (el, value) { - self.allowBeffRadios(value); - self.experimentDetails.geeq.manualBatchEffectActive = value; - document.getElementById('beff-notification').removeAttribute("hidden"); - ExpressionExperimentController.setGeeqManualSettings(self.experimentDetails.id, self.experimentDetails.geeq, { - callback: self.beffNotifySaved - }); - } - })); - - beffExtra.add(new Ext.form.Radio({ - xtype: 'radio', - id: 'gq-beff-override-value-strong', - name: 'gq-beff-override-value', - boxLabel: 'Strong', - hideLabel: false, - checked: ee.geeq.manualHasStrongBatchEffect, - handler: function (el, value) { - if (!value) return; // since we have 3 radios, we wil only process the one that was selected - ee.geeq.manualHasStrongBatchEffect = value; - ee.geeq.manualHasNoBatchEffect = !value; - document.getElementById('beff-notification').removeAttribute("hidden"); - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: self.beffNotifySaved - }); - } - })); - - beffExtra.add(new Ext.form.Radio({ - xtype: 'radio', - id: 'gq-beff-override-value-weak', - name: 'gq-beff-override-value', - boxLabel: 'Weak', - hideLabel: false, - checked: !ee.geeq.manualHasStrongBatchEffect && !ee.geeq.manualHasNoBatchEffect, - handler: function (el, value) { - if (!value) return; // since we have 3 radios, we wil only process the one that was selected - ee.geeq.manualHasStrongBatchEffect = !value; - ee.geeq.manualHasNoBatchEffect = !value; - document.getElementById('beff-notification').removeAttribute("hidden"); - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: self.beffNotifySaved - }); - } - })); - - beffExtra.add(new Ext.form.Radio({ - xtype: 'radio', - id: 'gq-beff-override-value-none', - name: 'gq-beff-override-value', - boxLabel: 'No batch effect', - hideLabel: false, - checked: ee.geeq.manualHasNoBatchEffect, - handler: function (el, value) { - if (!value) return; // since we have 3 radios, we wil only process the one that was selected - ee.geeq.manualHasStrongBatchEffect = !value; - ee.geeq.manualHasNoBatchEffect = value; - document.getElementById('beff-notification').removeAttribute("hidden"); - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: self.beffNotifySaved - }); - } - })); - - beffExtra.add({cls: 'gq-notif hidden', html: ''}); - - return beffExtra; - }, - - beffNotifySaved: function () { - var nr = document.getElementById('beff-notification'); - if (nr) { - nr.setAttribute("hidden", "true"); - } - }, - - qualExtraRendeder: function (ee) { - - this.qualFolded = !ee.geeq.manualQualityOverride; - - var qualExtra = new Ext.Panel({ - cls: 'gq-extra' + (this.qualFolded ? ' folded' : ''), - defaults: { - border: false, - padding: 0 - } - }); - - var self = this; - var foldButton = new Ext.Button({ - text: '', - cls: 'gq-btn', - handler: function () { - this.foldPanel(qualExtra, self.qualFolded = !self.qualFolded); + cls : 'gq-btn btn-refresh gq-btn-recalc-all', + handler : function( b, e ) { + b.setText( "Recalculate score and refresh page (takes a minute)" ); + b.setDisabled( true ); + ExpressionExperimentController.runGeeq( self.experimentDetails.id, "all", { + callback : function() { + window.location.reload(); + } + } ); + }, + scope : this + } ); + + var recalcButtonWrap = new Ext.Panel( { + cls : 'extjs-sucks', + defaults : { + border : false, + padding : 0 + } + } ); + + recalcButtonWrap.add( gqRecalcButton ); + rightPanel.add( recalcButtonWrap ); + + eeRow.add( rightPanel ); + + this.add( eeRow ); + }, + + suitabilityRenderer : function( ee, mgr ) { + var panel = new Ext.Panel( { + defaults : { + border : false, + padding : 0 + }, + items : [ { + html : '

Suitability

' + } ] + } ); + + var sHead = new Ext.Panel( { + cls : 'gq-head', + defaults : { + border : false, + padding : 0 + } + } ); + + var suitExtra = this.suitExtraRendeder( ee ); + sHead.add( this.geeqRowRenderer( "Public suitability score", ee.geeq.publicSuitabilityScore, + "This is the suitability score that is currently publicly displayed.", "", 2, null, suitExtra, true ) ); + if ( this.allowScoreOverride ) sHead.add( suitExtra ); + this.allowSuitInput( ee.geeq.manualSuitabilityOverride ); + + + panel.add( sHead ); + + var sBody = new Ext.Panel( { + cls : 'gq-body', + defaults : { + border : false, + padding : 0 + } + } ); + + var detailsButtonWrap = new Ext.Panel( { + cls : 'extjs-sucks', + defaults : { + border : false, + padding : 0 + } + } ); + + var detailsButton = this.detailsButtonRenderer( sBody ); + + detailsButtonWrap.add( detailsButton ); + panel.add( detailsButtonWrap ); + + var sPubDesc = + Number( ee.geeq.sScorePublication ) === -1 ? "Experiment has no publication, try filling it in." : + "Experiment does have a publication filled in properly."; + + var sPlatfAmntDesc = + Number( ee.geeq.sScorePlatformAmount ) === -1 ? "Experiment is on more than 2 platforms. Consider splitting the experiment." : + Number( ee.geeq.sScorePlatformAmount ) === -0.5 ? "Experiment has 2 platforms. Consider splitting the experiment." : + "Experiment is on a single platform."; + + var sPlatfTechDesc = + Number( ee.geeq.sScorePlatformsTechMulti ) === -1 ? "Experiment has two or more platforms that use different technologies. Experiment should be split." : "" + + "All used platforms use the same technology."; + + var sPlatfPopDesc = + Number( ee.geeq.sScoreAvgPlatformPopularity ) === -1 ? "Platform(s) used (on average) by less than 10 experiments." : + Number( ee.geeq.sScoreAvgPlatformPopularity ) === -0.5 ? "Platform(s) used (on average) by less than 20 experiments." : + Number( ee.geeq.sScoreAvgPlatformPopularity ) === 0.0 ? "Platform(s) used (on average) by less than 50 experiments." : + Number( ee.geeq.sScoreAvgPlatformPopularity ) === 0.5 ? "Platform(s) used (on average) by less than 100 experiments." : + "Platform(s) used (on average) by at least 100 experiments."; + + var sPlatfSizeDesc = + Number( ee.geeq.sScoreAvgPlatformSize ) === -1 ? "Platform has (or all platforms have on average) very low gene covrage." : + Number( ee.geeq.sScoreAvgPlatformSize ) === -0.5 ? "Platform has (or all platforms have on average) low gene coverage." : + Number( ee.geeq.sScoreAvgPlatformSize ) === 0.0 ? "Platform has (or all platforms have on average) moderate gene coverage." : + Number( ee.geeq.sScoreAvgPlatformSize ) === 0.5 ? "Platform has (or all platforms have on average) good gene coverage." : + "Platform has (or all paltforms have on average) excellent gene coverage."; + + var sSizeDesc = + Number( ee.geeq.sScoreSampleSize ) === -1 ? "The experiment has less than 6 samples or more than 500 samples" : + Number( ee.geeq.sScoreSampleSize ) === -0.3 ? "The experiment has less than 10 samples." : + Number( ee.geeq.sScoreSampleSize ) === 0.0 ? "The experiment has less than 20 samples." : "The experiment has at least 20 samples."; + + var sRawDesc = + Number( ee.geeq.sScoreRawData ) === -1 ? "Experiment has no raw data available (data are from external source). Try obtaining the raw data." + : "We do have raw data available for this experiment."; + + var sMissErr = + ee.geeq.noVectors === true ? "Experiment has no computed vectors, run the vector computation!" : ""; + var sMissDesc = + ee.geeq.noVectors === true ? "There are no computed vectors." : + Number( ee.geeq.sScoreMissingValues ) === -1 ? "Experiment has missing values. Try filling them in, ideally by obtaining raw data." : + "There are no missing values."; + + sBody.add( this.geeqRowRenderer( 'Publication', ee.geeq.sScorePublication, + "Checks whether the experiment has a publication.", sPubDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Platforms used', ee.geeq.sScorePlatformAmount, + "The amount of platforms the experiment uses.", sPlatfAmntDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Platforms tech consistency', ee.geeq.sScorePlatformsTechMulti, + "Punishes technology inconsistency of multi-platform experiments.", sPlatfTechDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Platforms usage', ee.geeq.sScoreAvgPlatformPopularity, + "Depends on the popularity (experiments that use the platform) of the used platform. If there are multiple platforms," + + "the popularity is averaged.", sPlatfPopDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Platforms size', ee.geeq.sScoreAvgPlatformSize, + "Depends on the size (the number of elements) of the used platform. If there are multiple platforms, the" + + "size is averaged.", sPlatfSizeDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Sample size', ee.geeq.sScoreSampleSize, + "Depends on the experiments size (number of samples).", sSizeDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Raw data state', ee.geeq.sScoreRawData, + "Checks whether there was raw data available for this experiment.", sRawDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Missing values', ee.geeq.sScoreMissingValues, + "Checks whether the experiment has any missing values.", sMissDesc, 1, sMissErr ) ); + + panel.add( sBody ); + if ( !sMissErr ) { + sBody.hide(); + } + return panel; + }, + + qualityRenderer : function( ee, mgr ) { + var panel = new Ext.Panel( { + defaults : { + border : false, + padding : 0 + }, + items : [ { + html : '

Quality

' + } ] + } ); + + var qHead = new Ext.Panel( { + cls : 'gq-head', + defaults : { + border : false, + padding : 0 + } + } ); + + var qualExtra = this.qualExtraRendeder( ee ); + qHead.add( this.geeqRowRenderer( "Public quality score", ee.geeq.publicQualityScore, + "This is the quality score that is currently publicly displayed.", "", 2, null, qualExtra, true ) ); + if ( this.allowScoreOverride ) qHead.add( qualExtra ); + this.allowQualInput( ee.geeq.manualQualityOverride ); + + panel.add( qHead ); + + var qBody = new Ext.Panel( { + cls : 'gq-body', + defaults : { + border : false, + padding : 0 + } + } ); + + var detailsButtonWrap = new Ext.Panel( { + cls : 'extjs-sucks', + defaults : { + border : false, + padding : 0 + } + } ); + + var detailsButton = this.detailsButtonRenderer( qBody ); + + detailsButtonWrap.add( detailsButton ); + panel.add( detailsButtonWrap ); + + var qOutlErr = + Number( ee.geeq.corrMatIssues ) === 1 ? "The correlation matrix is empty!" : + Number( ee.geeq.corrMatIssues ) === 2 ? "There are NaN values in the correlation matrix." : + ""; + + var qOutlierDesc = + Number( ee.geeq.qScoreOutliers ) === -1 ? "There are detected, non-removed outliers. Removing detected outliers will improve the score." : + "No outliers were detected."; + + var qPlatfTechMultiDesc = + Number( ee.geeq.qScorePlatformsTech ) === -1 ? "The experiment is on a two-color platform." : "" + + "The experiment is NOT on a two-color platform."; + + var qReplErr = + Number( ee.geeq.replicatesIssues ) === 1 ? "There is no experimental design for this experiment" : + Number( ee.geeq.replicatesIssues ) === 2 ? "There are no factor values" : + Number( ee.geeq.replicatesIssues ) === 3 ? "All factor-value combinations have no replicates." : + Number( ee.geeq.replicatesIssues ) === 4 ? "The lowest replicate amount was 0 - this should be impossible, please report" : + ""; + + // These thresholds are defined + var qReplDesc = + Number( ee.geeq.qScoreReplicates ) === -1 ? "There is a factor-value combination that has very few or no replicates." : + Number( ee.geeq.qScoreReplicates ) === 0.0 ? "There is a factor-value combination that has moderately few replicates. " : + "All factor-value combinations have a good number of replicates"; + + var qBatchInfoDesc = + Number( ee.geeq.qScoreBatchInfo ) === -1 ? "The experiment has no batch info. Try filling it in." : "" + + "Batch information provided."; + + var qBatchEffErr = + Number( ee.geeq.qScoreBatchInfo ) === -1 ? "There is no batch information" : + Number( ee.geeq.QScoreBatchEffect ) === 0.0 && Number( ee.geeq.QScoreBatchConfound ) < 1 ? "Batch confound detected, batch effect detection skipped." : + ee.geeq.batchCorrected === true ? "Data was batch-corrected." : ""; + + var qBatchEffDesc = + ee.geeq.manualBatchEffectActive === true ? "Manually set value, detected score was: " + ee.geeq.QScoreBatchEffect : + Number( ee.geeq.qScoreBatchInfo ) === -1 ? "There were problems when checking for batch effect." : + Number( ee.geeq.QScoreBatchEffect ) === -1 ? "Experiment has a batch effect; Try to batch-correct." : + Number( ee.geeq.QScoreBatchEffect ) === 0.0 && Number( ee.geeq.QScoreBatchConfound ) < 1 ? "Batch effect score defaults to 0 when data is confounded with batches." : + Number( ee.geeq.QScoreBatchEffect ) === 0.0 ? "The experiment has some evidence for a batch effect. Try to batch-correct." : + "Batch effect considered negligible"; // FIXME: this seems to not be working right when there is a confound; ee.geeq.qStoreBatchConfound is not defined? + + var qBatchConfErr = + Number( ee.geeq.qScoreBatchInfo ) === -1 ? "There is no batch information" : + ""; + + var qBatchConfDesc = + ee.geeq.manualBatchConfoundActive === true ? "Manually set value, detected score was: " + ee.geeq.QScoreBatchConfound : + Number( ee.geeq.QScoreBatchConfound ) === -1 ? "Batch confound has been detected." : + Number( ee.geeq.QScoreBatchConfound ) === 0.0 ? "There were problems when checking for batch confound." : + "The experiment does not seem to be confounded with the batches."; + + var bconfExtra = this.bconfExtraRendeder( ee ); + var beffExtra = this.beffExtraRendeder( ee ); + + this.allowBconfRadios( ee.geeq.manualBatchConfoundActive ); + this.allowBeffRadios( ee.geeq.manualBatchEffectActive ); + + qBody.add( this.geeqRowRenderer( 'Mean sample corr.', ee.geeq.qScoreSampleMeanCorrelation, + "[Not included in final score] The actual mean correlation of samples.", "Not included in final score", 4, qOutlErr ) ); + + qBody.add( this.geeqRowRenderer( 'Sample corr. variance', ee.geeq.qScoreSampleCorrelationVariance, + "[Not included in final score] The actual variance of sample correlation.", "Not included in final score", 4, qOutlErr ) ); + + qBody.add( this.geeqRowRenderer( 'Median sample corr.', ee.geeq.qScoreSampleMedianCorrelation, + "The actual median correlation of samples.", "Included in the final score. Can be somewhat improved by removing outliers.", 4, qOutlErr ) ); + + qBody.add( this.geeqRowRenderer( 'Outliers', ee.geeq.qScoreOutliers, + "Depends on the presence of detected (non-removed) outliers. If there are any outliers, the score will be low.", qOutlierDesc, 1, qOutlErr ) ); + + qBody.add( this.geeqRowRenderer( 'Platform technology', ee.geeq.qScorePlatformsTech, + "Checks whether the experiments platform (any one, if there are multiple) is two-color.", qPlatfTechMultiDesc ) ); + + qBody.add( this.geeqRowRenderer( 'Replicates', ee.geeq.qScoreReplicates, + "Checks the replicate amount of all factor-value combinations, and takes the lowest one.", qReplDesc, 1, qReplErr ) ); + + qBody.add( this.geeqRowRenderer( 'Batch info', ee.geeq.qScoreBatchInfo, + "Checks whether the experiment has batch info available.", qBatchInfoDesc ) ); + + qBody.add( this.geeqRowRenderer( 'Batch confound', ee.geeq.qScorePublicBatchConfound, + "Checks whether the experimental data are confounded with batches. This value is the currently publicly displayed information.", + qBatchConfDesc, 1, qBatchConfErr, bconfExtra ) ); + qBody.add( bconfExtra ); + + qBody.add( this.geeqRowRenderer( 'Batch effect', ee.geeq.qScorePublicBatchEffect, + "Checks the experimental data for a batch effect. This value is the currently publicly displayed information.", + qBatchEffDesc, 1, qBatchEffErr, beffExtra ) ); + qBody.add( beffExtra ); + + panel.add( qBody ); + if ( !qReplErr && !qOutlErr && !qBatchConfErr ) { + qBody.hide(); + } + return panel; + }, + + detailsButtonRenderer : function( panel ) { + return new Ext.Button( { + text : ' Show score breakdown and details', + cls : 'gq-btn gq-btn-details', + handler : function() { + this.showPanel( panel, !panel.isVisible() ) + }, + scope : this + } ); + }, + + bconfExtraRendeder : function( ee ) { + + this.bconfFolded = !ee.geeq.manualBatchConfoundActive; + + var bconfExtra = new Ext.Panel( { + cls : 'gq-extra' + (this.bconfFolded ? ' folded' : ''), + defaults : { + border : false, + padding : 0 + } + } ); + + var self = this; + var foldButton = new Ext.Button( { + text : '', + cls : 'gq-btn', + handler : function() { + this.foldPanel( bconfExtra, self.bconfFolded = !self.bconfFolded ); + }, + scope : this + } ); + + bconfExtra.add( foldButton ); + + bconfExtra.add( new Ext.Button( { + text : 'Re-score batch info', + tooltip : 'Run geeq only for the batch info related scores (refreshes page).', + handler : function( b, e ) { + b.setText( "Re-score batch info" ); + b.setDisabled( true ); + ExpressionExperimentController.runGeeq( self.experimentDetails.id, "batch", { + callback : function() { + window.location.reload(); + } + } ); + }, + scope : this, + cls : 'btn-refresh gq-subscore-refresh-btn' + } ) ); + + bconfExtra.add( new Ext.form.Checkbox( { + xtype : 'checkbox', + id : 'gq-bconf-override', + boxLabel : 'Override:', + hideLabel : false, + checked : ee.geeq.manualBatchConfoundActive, + handler : function( el, value ) { + self.allowBconfRadios( value ); + ee.geeq.manualBatchConfoundActive = value; + document.getElementById( 'bconf-notification' ).removeAttribute( "hidden" ); + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : self.bconfNotifySaved + } ); + } + } ) ); + + bconfExtra.add( new Ext.form.Radio( { + xtype : 'radio', + id : 'gq-bconf-override-value-true', + name : 'gq-bconf-override-value', + boxLabel : 'Confounded', + hideLabel : false, + checked : ee.geeq.manualHasBatchConfound, + handler : function( el, value ) { + ee.geeq.manualHasBatchConfound = value; + document.getElementById( 'bconf-notification' ).removeAttribute( "hidden" ); + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : self.bconfNotifySaved + } ); + } + } ) ); + + bconfExtra.add( new Ext.form.Radio( { + xtype : 'radio', + id : 'gq-bconf-override-value-false', + name : 'gq-bconf-override-value', + boxLabel : 'Not confounded', + hideLabel : false, + checked : !ee.geeq.manualHasBatchConfound + } ) ); + + bconfExtra.add( {cls : 'gq-notif hidden', html : ''} ); + + return bconfExtra; + }, + + bconfNotifySaved : function() { + var nr = document.getElementById( 'bconf-notification' ); + if ( nr ) { + nr.setAttribute( "hidden", "true" ); + } + }, + + beffExtraRendeder : function( ee ) { + + this.beffFolded = !ee.geeq.manualBatchEffectActive; + + var beffExtra = new Ext.Panel( { + cls : 'gq-extra' + (this.beffFolded ? ' folded' : ''), + defaults : { + border : false, + padding : 0 + } + } ); + + var self = this; + var foldButton = new Ext.Button( { + text : '', + cls : 'gq-btn', + handler : function() { + this.foldPanel( beffExtra, self.beffFolded = !self.beffFolded ); + }, + scope : this + } ); + + beffExtra.add( foldButton ); + + beffExtra.add( new Ext.Button( { + text : 'Re-score batch info', + tooltip : 'Run geeq only for the batch info related scores (refreshes page).', + handler : function( b, e ) { + b.setText( "Re-score batch info" ); + b.setDisabled( true ); + ExpressionExperimentController.runGeeq( self.experimentDetails.id, "batch", { + callback : function() { + window.location.reload(); + } + } ); + }, + scope : this, + cls : 'btn-refresh gq-subscore-refresh-btn' + } ) ); + + beffExtra.add( new Ext.form.Checkbox( { + xtype : 'checkbox', + id : 'gq-beff-override', + boxLabel : 'Override:', + hideLabel : false, + checked : ee.geeq.manualBatchEffectActive, + handler : function( el, value ) { + self.allowBeffRadios( value ); + self.experimentDetails.geeq.manualBatchEffectActive = value; + document.getElementById( 'beff-notification' ).removeAttribute( "hidden" ); + ExpressionExperimentController.setGeeqManualSettings( self.experimentDetails.id, self.experimentDetails.geeq, { + callback : self.beffNotifySaved + } ); + } + } ) ); + + beffExtra.add( new Ext.form.Radio( { + xtype : 'radio', + id : 'gq-beff-override-value-strong', + name : 'gq-beff-override-value', + boxLabel : 'Strong', + hideLabel : false, + checked : ee.geeq.manualHasStrongBatchEffect, + handler : function( el, value ) { + if ( !value ) return; // since we have 3 radios, we wil only process the one that was selected + ee.geeq.manualHasStrongBatchEffect = value; + ee.geeq.manualHasNoBatchEffect = !value; + document.getElementById( 'beff-notification' ).removeAttribute( "hidden" ); + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : self.beffNotifySaved + } ); + } + } ) ); + + beffExtra.add( new Ext.form.Radio( { + xtype : 'radio', + id : 'gq-beff-override-value-weak', + name : 'gq-beff-override-value', + boxLabel : 'Weak', + hideLabel : false, + checked : !ee.geeq.manualHasStrongBatchEffect && !ee.geeq.manualHasNoBatchEffect, + handler : function( el, value ) { + if ( !value ) return; // since we have 3 radios, we wil only process the one that was selected + ee.geeq.manualHasStrongBatchEffect = !value; + ee.geeq.manualHasNoBatchEffect = !value; + document.getElementById( 'beff-notification' ).removeAttribute( "hidden" ); + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : self.beffNotifySaved + } ); + } + } ) ); + + beffExtra.add( new Ext.form.Radio( { + xtype : 'radio', + id : 'gq-beff-override-value-none', + name : 'gq-beff-override-value', + boxLabel : 'No batch effect', + hideLabel : false, + checked : ee.geeq.manualHasNoBatchEffect, + handler : function( el, value ) { + if ( !value ) return; // since we have 3 radios, we wil only process the one that was selected + ee.geeq.manualHasStrongBatchEffect = !value; + ee.geeq.manualHasNoBatchEffect = value; + document.getElementById( 'beff-notification' ).removeAttribute( "hidden" ); + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : self.beffNotifySaved + } ); + } + } ) ); + + beffExtra.add( {cls : 'gq-notif hidden', html : ''} ); + + return beffExtra; + }, + + beffNotifySaved : function() { + var nr = document.getElementById( 'beff-notification' ); + if ( nr ) { + nr.setAttribute( "hidden", "true" ); + } + }, + + qualExtraRendeder : function( ee ) { + + this.qualFolded = !ee.geeq.manualQualityOverride; + + var qualExtra = new Ext.Panel( { + cls : 'gq-extra' + (this.qualFolded ? ' folded' : ''), + defaults : { + border : false, + padding : 0 + } + } ); + + var self = this; + var foldButton = new Ext.Button( { + text : '', + cls : 'gq-btn', + handler : function() { + this.foldPanel( qualExtra, self.qualFolded = !self.qualFolded ); + }, + scope : this + } ); + + qualExtra.add( foldButton ); + + qualExtra.add( new Ext.Panel( { + cls : 'gq-qual-warning', + defaults : { + border : false, + padding : 0 }, - scope: this - }); - - qualExtra.add(foldButton); - - qualExtra.add(new Ext.Panel({ - cls: 'gq-qual-warning', - defaults: { - border: false, - padding: 0 - }, - items: [ - { - html: - "
" + - "" + - "" + - "

Changing the score manually is a last resort measure, that should not be used on regular basis.

" + - "

Please consult this step with your supervisor.

" + - "
" + - "
" + - "
" - } - ] - }) - ); - - qualExtra.add(new Ext.form.Checkbox({ - xtype: 'checkbox', - id: 'gq-qual-override', - boxLabel: 'Override public score?', - hideLabel: false, - checked: ee.geeq.manualQualityOverride, - handler: function (el, value) { - self.allowQualInput(value); - ee.geeq.manualQualityOverride = value; - if (value) ee.geeq.manualQualityScore = Number(document.getElementById('gq-qual-override-value').value); - } - })); - - var qval = (ee.geeq.manualQualityScore ? ee.geeq.manualQualityScore : ee.geeq.detectedQualityScore); - qualExtra.add({ - cls: "gq-override-value-wrap", - html: + items : [ + { + html : + "
" + + "" + + "" + + "

Changing the score manually is a last resort measure, that should not be used on regular basis.

" + + "

Please consult this step with your supervisor.

" + + "
" + + "
" + + "
" + } + ] + } ) + ); + + qualExtra.add( new Ext.form.Checkbox( { + xtype : 'checkbox', + id : 'gq-qual-override', + boxLabel : 'Override public score?', + hideLabel : false, + checked : ee.geeq.manualQualityOverride, + handler : function( el, value ) { + self.allowQualInput( value ); + ee.geeq.manualQualityOverride = value; + if ( value ) ee.geeq.manualQualityScore = Number( document.getElementById( 'gq-qual-override-value' ).value ); + } + } ) ); + + var qval = (ee.geeq.manualQualityScore ? ee.geeq.manualQualityScore : ee.geeq.detectedQualityScore); + qualExtra.add( { + cls : "gq-override-value-wrap", + html : " " - }); - - qualExtra.add(new Ext.slider.SingleSlider({ - id: 'gq-qual-override-value-slider', - cls: 'gq-override-value-slider', - name: 'gq-qual-override-value-slider', - width: 200, - value: ((ee.geeq.manualQualityScore ? ee.geeq.manualQualityScore : ee.geeq.detectedQualityScore) + 1) * 10, - increment: 1, - minValue: 0, - maxValue: 20, - hideLabel: true, - clickToChange: true, - listeners: { - change: function (el, val) { - var nr = document.getElementById('gq-qual-override-value'); - nr.value = (Math.round(val) / 10 - 1).toFixed(1); - nr.style.background = scoreToColor(Number(nr.value)); - ee.geeq.manualQualityScore = nr.value; - } + } ); + + qualExtra.add( new Ext.slider.SingleSlider( { + id : 'gq-qual-override-value-slider', + cls : 'gq-override-value-slider', + name : 'gq-qual-override-value-slider', + width : 200, + value : ((ee.geeq.manualQualityScore ? ee.geeq.manualQualityScore : ee.geeq.detectedQualityScore) + 1) * 10, + increment : 1, + minValue : 0, + maxValue : 20, + hideLabel : true, + clickToChange : true, + listeners : { + change : function( el, val ) { + var nr = document.getElementById( 'gq-qual-override-value' ); + nr.value = (Math.round( val ) / 10 - 1).toFixed( 1 ); + nr.style.background = scoreToColor( Number( nr.value ) ); + ee.geeq.manualQualityScore = nr.value; } - })); - - var saveButton = new Ext.Button({ - text: ' Save changes', - cls: 'gq-btn-save', - handler: function (el, value) { - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: function () { - window.location.reload(); - } - }); + } + } ) ); + + var saveButton = new Ext.Button( { + text : ' Save changes', + cls : 'gq-btn-save', + handler : function( el, value ) { + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : function() { + window.location.reload(); + } + } ); + }, + scope : this + } ); + qualExtra.add( saveButton ); + + return qualExtra; + }, + + suitExtraRendeder : function( ee ) { + + this.suitFolded = !ee.geeq.manualSuitabilityOverride; + + var suitExtra = new Ext.Panel( { + cls : 'gq-extra' + (this.suitFolded ? ' folded' : ''), + defaults : { + border : false, + padding : 0 + } + } ); + + var self = this; + var foldButton = new Ext.Button( { + text : '', + cls : 'gq-btn', + handler : function() { + this.foldPanel( suitExtra, self.suitFolded = !self.suitFolded ); + }, + scope : this + } ); + + suitExtra.add( foldButton ); + + suitExtra.add( new Ext.Panel( { + cls : 'gq-suit-warning', + defaults : { + border : false, + padding : 0 }, - scope: this - }); - qualExtra.add(saveButton); - - return qualExtra; - }, - - suitExtraRendeder: function (ee) { - - this.suitFolded = !ee.geeq.manualSuitabilityOverride; - - var suitExtra = new Ext.Panel({ - cls: 'gq-extra' + (this.suitFolded ? ' folded' : ''), - defaults: { - border: false, - padding: 0 - } - }); - - var self = this; - var foldButton = new Ext.Button({ - text: '', - cls: 'gq-btn', - handler: function () { - this.foldPanel(suitExtra, self.suitFolded = !self.suitFolded); - }, - scope: this - }); - - suitExtra.add(foldButton); - - suitExtra.add(new Ext.Panel({ - cls: 'gq-suit-warning', - defaults: { - border: false, - padding: 0 - }, - items: [ - { - html: - "
" + - "" + - "" + - "

Changing the score manually is a last resort measure, that should not be used on regular basis.

" + - "

Please consult this step with your supervisor.

" + - "
" + - "
" + - "
" - } - ] - }) - ); - - suitExtra.add(new Ext.form.Checkbox({ - xtype: 'checkbox', - id: 'gq-suit-override', - boxLabel: 'Override public score?', - hideLabel: false, - checked: ee.geeq.manualSuitabilityOverride, - handler: function (el, value) { - self.allowSuitInput(value); - ee.geeq.manualSuitabilityOverride = value; - if (value) ee.geeq.manualSuitabilityScore = Number(document.getElementById('gq-suit-override-value').value); - } - })); - - var sval = (ee.geeq.manualSuitabilityScore ? ee.geeq.manualSuitabilityScore : ee.geeq.detectedSuitabilityScore); - suitExtra.add({ - cls: "gq-override-value-wrap", - html: + items : [ + { + html : + "
" + + "" + + "" + + "

Changing the score manually is a last resort measure, that should not be used on regular basis.

" + + "

Please consult this step with your supervisor.

" + + "
" + + "
" + + "
" + } + ] + } ) + ); + + suitExtra.add( new Ext.form.Checkbox( { + xtype : 'checkbox', + id : 'gq-suit-override', + boxLabel : 'Override public score?', + hideLabel : false, + checked : ee.geeq.manualSuitabilityOverride, + handler : function( el, value ) { + self.allowSuitInput( value ); + ee.geeq.manualSuitabilityOverride = value; + if ( value ) ee.geeq.manualSuitabilityScore = Number( document.getElementById( 'gq-suit-override-value' ).value ); + } + } ) ); + + var sval = (ee.geeq.manualSuitabilityScore ? ee.geeq.manualSuitabilityScore : ee.geeq.detectedSuitabilityScore); + suitExtra.add( { + cls : "gq-override-value-wrap", + html : " " - }); - - suitExtra.add(new Ext.slider.SingleSlider({ - id: 'gq-suit-override-value-slider', - cls: 'gq-override-value-slider', - name: 'gq-suit-override-value-slider', - width: 200, - value: ((ee.geeq.manualSuitabilityScore ? ee.geeq.manualSuitabilityScore : ee.geeq.detectedSuitabilityScore) + 1) * 10, - increment: 1, - minValue: 0, - maxValue: 20, - hideLabel: true, - clickToChange: true, - listeners: { - change: function (el, val) { - var nr = document.getElementById('gq-suit-override-value'); - nr.value = (Math.round(val) / 10 - 1).toFixed(1); - nr.style.background = scoreToColor(Number(nr.value)); - ee.geeq.manualSuitabilityScore = nr.value; - } + } ); + + suitExtra.add( new Ext.slider.SingleSlider( { + id : 'gq-suit-override-value-slider', + cls : 'gq-override-value-slider', + name : 'gq-suit-override-value-slider', + width : 200, + value : ((ee.geeq.manualSuitabilityScore ? ee.geeq.manualSuitabilityScore : ee.geeq.detectedSuitabilityScore) + 1) * 10, + increment : 1, + minValue : 0, + maxValue : 20, + hideLabel : true, + clickToChange : true, + listeners : { + change : function( el, val ) { + var nr = document.getElementById( 'gq-suit-override-value' ); + nr.value = (Math.round( val ) / 10 - 1).toFixed( 1 ); + nr.style.background = scoreToColor( Number( nr.value ) ); + ee.geeq.manualSuitabilityScore = nr.value; } - })); - - var saveButton = new Ext.Button({ - text: ' Save changes', - cls: 'gq-btn-save', - handler: function () { - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: function () { - window.location.reload(); - } - }); - }, - scope: this - }); - suitExtra.add(saveButton); - - return suitExtra; - }, - - allowBeffRadios: function (allow) { - Ext.getCmp('gq-beff-override-value-strong').setDisabled(!allow); - Ext.getCmp('gq-beff-override-value-weak').setDisabled(!allow); - Ext.getCmp('gq-beff-override-value-none').setDisabled(!allow); - }, - - allowBconfRadios: function (allow) { - Ext.getCmp('gq-bconf-override-value-true').setDisabled(!allow); - Ext.getCmp('gq-bconf-override-value-false').setDisabled(!allow); - }, - - allowQualInput: function (allow) { - Ext.getCmp('gq-qual-override-value-slider').setDisabled(!allow); - var nr = document.getElementById('gq-qual-override-value'); - if (nr && !allow) nr.setAttribute("disabled", "true"); - if (nr && allow) nr.removeAttribute("disabled"); - }, - - allowSuitInput: function (allow) { - Ext.getCmp('gq-suit-override-value-slider').setDisabled(!allow); - var nr = document.getElementById('gq-suit-override-value'); - if (nr && !allow) nr.setAttribute("disabled", "true"); - if (nr && allow) nr.removeAttribute("disabled"); - }, - - geeqRowRenderer: function (label, value, labelDesc, valueDesc, valDecimals, warning, extra, normalizeColor) { - if (valDecimals === undefined) valDecimals = 1; - var valColor = normalizeColor ? scoreToColorNormalized(Number(value)) : scoreToColor(Number(value)); - var valNumber = roundScore(value, valDecimals); - var cls = valNumber < 0 ? "negative" : "positive"; - var html = - '
' + - ' ' + - '' + - ' ' + label + '' + - ' ' + valNumber + ''; - if (valueDesc) { - html += '' - } - if (warning) { - html += '' - } - html += '
'; - - return { - html: html - }; - }, - - showPanel: function (panel, show) { - if (show) { - panel.show(); - } else { - panel.hide(); - } - }, - - foldPanel: function (panel, fold) { - if (fold) { - panel.addClass("folded"); - } else { - panel.removeClass("folded"); - } - }, + } + } ) ); + + var saveButton = new Ext.Button( { + text : ' Save changes', + cls : 'gq-btn-save', + handler : function() { + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : function() { + window.location.reload(); + } + } ); + }, + scope : this + } ); + suitExtra.add( saveButton ); + + return suitExtra; + }, + + allowBeffRadios : function( allow ) { + Ext.getCmp( 'gq-beff-override-value-strong' ).setDisabled( !allow ); + Ext.getCmp( 'gq-beff-override-value-weak' ).setDisabled( !allow ); + Ext.getCmp( 'gq-beff-override-value-none' ).setDisabled( !allow ); + }, + + allowBconfRadios : function( allow ) { + Ext.getCmp( 'gq-bconf-override-value-true' ).setDisabled( !allow ); + Ext.getCmp( 'gq-bconf-override-value-false' ).setDisabled( !allow ); + }, + + allowQualInput : function( allow ) { + Ext.getCmp( 'gq-qual-override-value-slider' ).setDisabled( !allow ); + var nr = document.getElementById( 'gq-qual-override-value' ); + if ( nr && !allow ) nr.setAttribute( "disabled", "true" ); + if ( nr && allow ) nr.removeAttribute( "disabled" ); + }, + + allowSuitInput : function( allow ) { + Ext.getCmp( 'gq-suit-override-value-slider' ).setDisabled( !allow ); + var nr = document.getElementById( 'gq-suit-override-value' ); + if ( nr && !allow ) nr.setAttribute( "disabled", "true" ); + if ( nr && allow ) nr.removeAttribute( "disabled" ); + }, + + geeqRowRenderer : function( label, value, labelDesc, valueDesc, valDecimals, warning, extra, normalizeColor ) { + if ( valDecimals === undefined ) valDecimals = 1; + var valColor = normalizeColor ? scoreToColorNormalized( Number( value ) ) : scoreToColor( Number( value ) ); + var valNumber = roundScore( value, valDecimals ); + var cls = valNumber < 0 ? "negative" : "positive"; + var html = + '
' + + ' ' + + '' + + ' ' + label + '' + + ' ' + valNumber + ''; + if ( valueDesc ) { + html += '' + } + if ( warning ) { + html += '' + } + html += '
'; + + return { + html : html + }; + }, + + showPanel : function( panel, show ) { + if ( show ) { + panel.show(); + } else { + panel.hide(); + } + }, + + foldPanel : function( panel, fold ) { + if ( fold ) { + panel.addClass( "folded" ); + } else { + panel.removeClass( "folded" ); + } + }, /* batchInfoMissingRenderer: function (ee, mgr) { @@ -969,369 +969,379 @@ Gemma.ExpressionExperimentTools = Ext.extend(Gemma.CurationTools, { return panelBC; },*/ - batchEffectRenderer: function (ee, mgr) { + batchEffectRenderer : function( ee, mgr ) { - var panelBC = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [] - }); + var panelBC = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [] + } ); - var be = (ee.batchEffect !== null && ee.batchEffect !== "") - ? { - html: ' ' - + ee.batchEffect - } - : { - html: ' ' + - (ee.hasBatchInformation === false ? 'No batch info, can not check for batch effect' : 'Batch effect not detected') - }; - - panelBC.add(be); + var hasBatchConfound = ee.batchConfound !== null && ee.batchConfound !== ""; - var recalculateBCBtn = new Ext.Button({ - text: '', - tooltip: "Recalculate batch effect (refreshes page)", - handler: function (b, e) { - ExpressionExperimentController.recalculateBatchEffect(ee.id, { - callback: function () { - window.location.reload(); - } - }); - b.setText(''); - b.setDisabled(true); - }, - scope: this, - cls: 'btn-refresh' - }); - - panelBC.add(recalculateBCBtn); - return panelBC; - }, - - batchConfoundRenderer: function (ee, mgr) { - - var panelBC = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [] - }); - - var be = (ee.batchConfound !== null && ee.batchConfound !== "") + if ( hasBatchConfound ) { + var be = { + html : ' ' + + "Batch effect not determined due to confound." + }; + panelBC.add( be ); + } else { + var be = (ee.batchEffect !== null && ee.batchEffect !== "") ? { - html: ' ' - + ee.batchConfound + html : ' ' + + ee.batchEffect } : { - html: ' Batch confound not detected' + html : ' ' + + (ee.hasBatchInformation === false ? 'No batch info, can not check for batch effect' : 'Batch effect not detected') }; - panelBC.add(be); - var recalculateBCBtn = new Ext.Button({ - text: '', - tooltip: 'Recalculate batch confound (refreshes page)', - handler: function (b, e) { - ExpressionExperimentController.recalculateBatchConfound(ee.id, { - callback: function () { - window.location.reload(); - } - }); - b.setText(''); - b.setDisabled(true); - }, - scope: this, - cls: 'btn-refresh' - }); - - panelBC.add(recalculateBCBtn); - return panelBC; - }, - - linkAnalysisPanelRenderer: function (ee, manager) { - var panel = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [{ - html: 'Link Analysis: ' - }] - }); - var id = ee.id; - var runBtn = new Ext.Button({ - text: '', - tooltip: 'Missing value computation (popup, refreshes page)', - handler: manager.doLinks.createDelegate(this, [id]), - scope: this, - cls: 'btn-refresh' - }); - if (ee.dateLinkAnalysis) { - var type = ee.linkAnalysisEventType; - var color = "#000"; - var suggestRun = true; - var qtip = 'ext:qtip="Analysis was OK"'; - if (type == 'FailedLinkAnalysisEvent') { - color = 'red'; - qtip = 'ext:qtip="Analysis failed"'; - } else if (type == 'TooSmallDatasetLinkAnalysisEvent') { - color = '#CCC'; - qtip = 'ext:qtip="Dataset is too small"'; - suggestRun = false; - } - panel.add({ - html: '' - + Gemma.Renderers.dateRenderer(ee.dateLinkAnalysis) - }); - // disable through gui + panelBC.add( be ); + } + + var recalculateBCBtn = new Ext.Button( { + text : '', + tooltip : "Recalculate batch effect (refreshes page)", + handler : function( b, e ) { + ExpressionExperimentController.recalculateBatchEffect( ee.id, { + callback : function() { + window.location.reload(); + } + } ); + b.setText( '' ); + b.setDisabled( true ); + }, + scope : this, + cls : 'btn-refresh' + } ); + + panelBC.add( recalculateBCBtn ); + return panelBC; + }, + + batchConfoundRenderer : function( ee, mgr ) { + + var panelBC = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [] + } ); + + var be = (ee.batchConfound !== null && ee.batchConfound !== "") + ? { + html : ' ' + + ee.batchConfound + } + : { + html : ' Batch confound not detected' + }; + + panelBC.add( be ); + var recalculateBCBtn = new Ext.Button( { + text : '', + tooltip : 'Recalculate batch confound (refreshes page)', + handler : function( b, e ) { + ExpressionExperimentController.recalculateBatchConfound( ee.id, { + callback : function() { + window.location.reload(); + } + } ); + b.setText( '' ); + b.setDisabled( true ); + }, + scope : this, + cls : 'btn-refresh' + } ); + + panelBC.add( recalculateBCBtn ); + return panelBC; + }, + + linkAnalysisPanelRenderer : function( ee, manager ) { + var panel = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [ { + html : 'Link Analysis: ' + } ] + } ); + var id = ee.id; + var runBtn = new Ext.Button( { + text : '', + tooltip : 'Missing value computation (popup, refreshes page)', + handler : manager.doLinks.createDelegate( this, [ id ] ), + scope : this, + cls : 'btn-refresh' + } ); + if ( ee.dateLinkAnalysis ) { + var type = ee.linkAnalysisEventType; + var color = "#000"; + var suggestRun = true; + var qtip = 'ext:qtip="Analysis was OK"'; + if ( type == 'FailedLinkAnalysisEvent' ) { + color = 'red'; + qtip = 'ext:qtip="Analysis failed"'; + } else if ( type == 'TooSmallDatasetLinkAnalysisEvent' ) { + color = '#CCC'; + qtip = 'ext:qtip="Dataset is too small"'; + suggestRun = false; + } + panel.add( { + html : '' + + Gemma.Renderers.dateRenderer( ee.dateLinkAnalysis ) + } ); + // disable through gui // if (suggestRun) { // panel.add(runBtn); // } - return panel; - } else { - panel.add({ - html: 'May be eligible; perform via CLI ' - }); - // disable through gui - // panel.add(runBtn); - return panel; - } - - }, - - missingValueAnalysisPanelRenderer: function (ee, manager) { - var panel = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [{ - html: 'Missing values: ' - }] - }); - var id = ee.id; - var runBtn = new Ext.Button({ - text: '', - tooltip: 'Missing value computation (popup, refreshes page)', - handler: manager.doMissingValues.createDelegate(this, [id]), - scope: this, - cls: 'btn-refresh' - }); - /* - * Offer missing value analysis if it's possible (this might need tweaking). - */ - if (ee.technologyType != 'ONECOLOR' && ee.technologyType != 'SEQUENCING' && ee.technologyType != 'GENELIST' && ee.hasEitherIntensity) { - - if (ee.dateMissingValueAnalysis) { - var type = ee.missingValueAnalysisEventType; - var color = "#000"; - var suggestRun = true; - var qtip = 'ext:qtip="OK"'; - if (type == 'FailedMissingValueAnalysisEvent') { - color = 'red'; - qtip = 'ext:qtip="Failed"'; - } - - panel.add({ - html: '' - + Gemma.Renderers.dateRenderer(ee.dateMissingValueAnalysis) + ' ' - }); - if (suggestRun) { - panel.add(runBtn); - } - return panel; - } else { - panel.add({ - html: 'Needed ' - }); - // panel.add(runBtn); - return panel; - } - - } else { - - panel - .add({ - html: 'NA' - }); - return panel; - } - }, - - processedVectorCreatePanelRenderer: function (ee, manager) { - var panel = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [{ - html: 'Preprocessing: ' - }] - }); - var id = ee.id; - var runBtn = new Ext.Button({ - text: '', - tooltip: 'Preprocess including PCA, correlation matrix and M-V (popup, refreshes page)', - handler: manager.doProcessedVectors.createDelegate(this, [id]), - scope: this, - cls: 'btn-refresh' - }); - if (ee.dateProcessedDataVectorComputation) { - var type = ee.processedDataVectorComputationEventType; + return panel; + } else { + panel.add( { + html : 'May be eligible; perform via CLI ' + } ); + // disable through gui + // panel.add(runBtn); + return panel; + } + + }, + + missingValueAnalysisPanelRenderer : function( ee, manager ) { + var panel = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [ { + html : 'Missing values: ' + } ] + } ); + var id = ee.id; + var runBtn = new Ext.Button( { + text : '', + tooltip : 'Missing value computation (popup, refreshes page)', + handler : manager.doMissingValues.createDelegate( this, [ id ] ), + scope : this, + cls : 'btn-refresh' + } ); + /* + * Offer missing value analysis if it's possible (this might need tweaking). + */ + if ( ee.technologyType != 'ONECOLOR' && ee.technologyType != 'SEQUENCING' && ee.technologyType != 'GENELIST' && ee.hasEitherIntensity ) { + + if ( ee.dateMissingValueAnalysis ) { + var type = ee.missingValueAnalysisEventType; var color = "#000"; - var suggestRun = true; var qtip = 'ext:qtip="OK"'; - if (type == 'FailedProcessedVectorComputationEvent') { - color = 'red'; - qtip = 'ext:qtip="Failed"'; + if ( type == 'FailedMissingValueAnalysisEvent' ) { + color = 'red'; + qtip = 'ext:qtip="Failed"'; } - panel.add({ - html: '' - + Gemma.Renderers.dateRenderer(ee.dateProcessedDataVectorComputation) + ' ' - }); - if (suggestRun) { - panel.add(runBtn); + + panel.add( { + html : '' + + Gemma.Renderers.dateRenderer( ee.dateMissingValueAnalysis ) + ' ' + } ); + if ( suggestRun ) { + panel.add( runBtn ); } return panel; - } else { - panel.add({ - html: 'Needed ' - }); - panel.add(runBtn); + } else { + panel.add( { + html : 'Needed ' + } ); + // panel.add(runBtn); return panel; - } - }, + } + + } else { + + panel + .add( { + html : 'NA' + } ); + return panel; + } + }, + + processedVectorCreatePanelRenderer : function( ee, manager ) { + var panel = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [ { + html : 'Preprocessing: ' + } ] + } ); + var id = ee.id; + var runBtn = new Ext.Button( { + text : '', + tooltip : 'Preprocess including PCA, correlation matrix and M-V (popup, refreshes page)', + handler : manager.doProcessedVectors.createDelegate( this, [ id ] ), + scope : this, + cls : 'btn-refresh' + } ); + if ( ee.dateProcessedDataVectorComputation ) { + var type = ee.processedDataVectorComputationEventType; + var color = "#000"; + + var suggestRun = true; + var qtip = 'ext:qtip="OK"'; + if ( type == 'FailedProcessedVectorComputationEvent' ) { + color = 'red'; + qtip = 'ext:qtip="Failed"'; + } + panel.add( { + html : '' + + Gemma.Renderers.dateRenderer( ee.dateProcessedDataVectorComputation ) + ' ' + } ); + if ( suggestRun ) { + panel.add( runBtn ); + } + return panel; + } else { + panel.add( { + html : 'Needed ' + } ); + panel.add( runBtn ); + return panel; + } + }, + + differentialAnalysisPanelRenderer : function( ee, manager ) { + var panel = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [ { + html : 'Differential Expression Analysis: ' + } ] + } ); + + if ( !ee.suitableForDEA ) { + var color = "#000"; + panel.add( { + html : '' + + 'Not suitable' + ' ' + } ); + return panel; + } - differentialAnalysisPanelRenderer: function (ee, manager) { - var panel = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [{ - html: 'Differential Expression Analysis: ' - }] - }); + var id = ee.id; + var runBtn = new Ext.Button( { + text : '', + tooltip : 'Differential expression analysis (popup, refreshes page)', + handler : manager.doDifferential.createDelegate( this, [ id ] ), + scope : this, + cls : 'btn-refresh' + } ); - if (!ee.suitableForDEA) { - var color = "#000"; - panel.add({ - html: '' - + 'Not suitable' + ' ' - }); - return panel; - } - - var id = ee.id; - var runBtn = new Ext.Button({ - text: '', - tooltip: 'Differential expression analysis (popup, refreshes page)', - handler: manager.doDifferential.createDelegate(this, [id]), - scope: this, - cls: 'btn-refresh' - }); + if ( ee.numPopulatedFactors > 0 ) { + if ( ee.dateDifferentialAnalysis ) { + var type = ee.differentialAnalysisEventType; - if (ee.numPopulatedFactors > 0) { - if (ee.dateDifferentialAnalysis) { - var type = ee.differentialAnalysisEventType; - - var color = "#000"; - var suggestRun = true; - var qtip = 'ext:qtip="OK"'; - if (type == 'FailedDifferentialExpressionAnalysisEvent') { - color = 'red'; - qtip = 'ext:qtip="Failed"'; - } - panel.add({ - html: '' - + Gemma.Renderers.dateRenderer(ee.dateDifferentialAnalysis) + ' ' - }); - if (suggestRun) { - panel.add(runBtn); - } - return panel; - } else { - - panel.add({ - html: 'Needed ' - }); - panel.add(runBtn); - return panel; + var color = "#000"; + var suggestRun = true; + var qtip = 'ext:qtip="OK"'; + if ( type == 'FailedDifferentialExpressionAnalysisEvent' ) { + color = 'red'; + qtip = 'ext:qtip="Failed"'; } - } else { + panel.add( { + html : '' + + Gemma.Renderers.dateRenderer( ee.dateDifferentialAnalysis ) + ' ' + } ); + if ( suggestRun ) { + panel.add( runBtn ); + } + return panel; + } else { - panel.add({ - html: 'NA' - }); + panel.add( { + html : 'Needed ' + } ); + panel.add( runBtn ); return panel; - } - }, - - renderProcessedExpressionVectorCount: function (e) { - return e.processedExpressionVectorCount ? e.processedExpressionVectorCount : ' [count not available] '; - }, - - /* - * This really replaces the PCA panel - allows for refresh of the diagnostics (PCA, sample correlation and MV) - */ - diagnosticsPanelRenderer: function (ee, manager) { - var panel = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [{ - html: 'Diagnostics (PCA, MV, Sample Corr, GEEQ): ' - }] - }); + } + } else { + + panel.add( { + html : 'NA' + } ); + return panel; + } + }, + + renderProcessedExpressionVectorCount : function( e ) { + return e.processedExpressionVectorCount ? e.processedExpressionVectorCount : ' [count not available] '; + }, + + /* + * This really replaces the PCA panel - allows for refresh of the diagnostics (PCA, sample correlation and MV) + */ + diagnosticsPanelRenderer : function( ee, manager ) { + var panel = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [ { + html : 'Diagnostics (PCA, MV, Sample Corr, GEEQ): ' + } ] + } ); var id = ee.id; - var runBtn = new Ext.Button({ - text: '', - tooltip: 'Update diagnostics (popup, refreshes page)', - handler: manager.doDiagnostics.createDelegate(this, [id, true]), - scope: this, - cls: 'btn-refresh' - }); + var runBtn = new Ext.Button( { + text : '', + tooltip : 'Update diagnostics (popup, refreshes page)', + handler : manager.doDiagnostics.createDelegate( this, [ id, true ] ), + scope : this, + cls : 'btn-refresh' + } ); // Get date and info. Note that we don't have a date for the diagnostics all together, so this can be improved. - if (ee.datePcaAnalysis) { - var type = ee.pcaAnalysisEventType; - - var color = "#000"; - var qtip = 'ext:qtip="OK"'; - var suggestRun = true; - - if (type == 'FailedPCAAnalysisEvent') { - color = 'red'; - qtip = 'ext:qtip="Failed"'; - } - panel.add({ - html: '' - + Gemma.Renderers.dateRenderer(ee.datePcaAnalysis) + ' ' - }); + if ( ee.datePcaAnalysis ) { + var type = ee.pcaAnalysisEventType; + + var color = "#000"; + var qtip = 'ext:qtip="OK"'; + var suggestRun = true; + + if ( type == 'FailedPCAAnalysisEvent' ) { + color = 'red'; + qtip = 'ext:qtip="Failed"'; + } + panel.add( { + html : '' + + Gemma.Renderers.dateRenderer( ee.datePcaAnalysis ) + ' ' + } ); } else - panel.add({ - html: 'Needed ' - }); + panel.add( { + html : 'Needed ' + } ); - panel.add(runBtn); + panel.add( runBtn ); return panel; - }, + }, - // removed in place of general diagnostics one. + // removed in place of general diagnostics one. // /* // * Get the last date PCA was run, add a button to run PCA // */ @@ -1382,77 +1392,77 @@ Gemma.ExpressionExperimentTools = Ext.extend(Gemma.CurationTools, { // // }, - /* - * Get the last date batch info was downloaded, add a button to download - */ - batchPanelRenderer: function (ee, manager) { - var panel = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [{ - html: 'Batch Information: ' - }] - }); - var id = ee.id; - var hasBatchInformation = ee.hasBatchInformation; - var technologyType = ee.technologyType; - var runBtn = new Ext.Button({ - text: '', - tooltip: 'Batch information (popup, refreshes page)', - // See EEManager.js doBatchInfoFetch(id) - handler: manager.doBatchInfoFetch.createDelegate(this, [id]), - scope: this, - cls: 'btn-refresh' - }); - - // Batch info fetching not allowed for RNA seq and other non-microarray data - if (technologyType == 'NONE') { - panel.add({ - html: '' + 'NA' + ' ' - }); - return panel; - } - - // If present, display the date and info. If batch information exists without date, display 'Provided'. - // If no batch information, display 'Needed' with button for GEO and ArrayExpress data. Otherwise, NA. - if (ee.dateBatchFetch) { - var type = ee.batchFetchEventType; - - var color = "#000"; - var qtip = 'ext:qtip="OK"'; - - if (type == 'FailedBatchInformationFetchingEvent') { - color = 'red'; - qtip = 'ext:qtip="Failed"'; - } else if (type == 'FailedBatchInformationMissingEvent') { - color = '#CCC'; - qtip = 'ext:qtip="Raw data files not available from source"'; - } + /* + * Get the last date batch info was downloaded, add a button to download + */ + batchPanelRenderer : function( ee, manager ) { + var panel = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [ { + html : 'Batch Information: ' + } ] + } ); + var id = ee.id; + var hasBatchInformation = ee.hasBatchInformation; + var technologyType = ee.technologyType; + var runBtn = new Ext.Button( { + text : '', + tooltip : 'Batch information (popup, refreshes page)', + // See EEManager.js doBatchInfoFetch(id) + handler : manager.doBatchInfoFetch.createDelegate( this, [ id ] ), + scope : this, + cls : 'btn-refresh' + } ); + + // Batch info fetching not allowed for RNA seq and other non-microarray data + if ( technologyType == 'NONE' ) { + panel.add( { + html : '' + 'NA' + ' ' + } ); + return panel; + } + + // If present, display the date and info. If batch information exists without date, display 'Provided'. + // If no batch information, display 'Needed' with button for GEO and ArrayExpress data. Otherwise, NA. + if ( ee.dateBatchFetch ) { + var type = ee.batchFetchEventType; + + var color = "#000"; + var qtip = 'ext:qtip="OK"'; + + if ( type == 'FailedBatchInformationFetchingEvent' ) { + color = 'red'; + qtip = 'ext:qtip="Failed"'; + } else if ( type == 'FailedBatchInformationMissingEvent' ) { + color = '#CCC'; + qtip = 'ext:qtip="Raw data files not available from source"'; + } + + panel.add( { + html : '' + + Gemma.Renderers.dateRenderer( ee.dateBatchFetch ) + ' ' + } ); + panel.add( runBtn ); + } else if ( hasBatchInformation ) { + panel.add( { + html : 'Provided' + } ); + } else if ( ee.externalDatabase == "GEO" || ee.externalDatabase == "ArrayExpress" ) { + panel.add( { + html : 'Needed ' + } ); + panel.add( runBtn ); + } else + panel.add( { + html : '' + 'NA' + + ' ' + } ); - panel.add({ - html: '' - + Gemma.Renderers.dateRenderer(ee.dateBatchFetch) + ' ' - }); - panel.add(runBtn); - } else if (hasBatchInformation) { - panel.add({ - html: 'Provided' - }); - } else if (ee.externalDatabase == "GEO" || ee.externalDatabase == "ArrayExpress") { - panel.add({ - html: 'Needed ' - }); - panel.add(runBtn); - } else - panel.add({ - html: '' + 'NA' - + ' ' - }); - - return panel; - } -}); + return panel; + } +} ); diff --git a/gemma-web/src/main/webapp/scripts/api/entities/platform/SequenceDetailsPanel.js b/gemma-web/src/main/webapp/scripts/api/entities/platform/SequenceDetailsPanel.js index d62d772454..95a9d160de 100644 --- a/gemma-web/src/main/webapp/scripts/api/entities/platform/SequenceDetailsPanel.js +++ b/gemma-web/src/main/webapp/scripts/api/entities/platform/SequenceDetailsPanel.js @@ -105,8 +105,8 @@ Gemma.SequenceDetailsPanel = Ext html: { tag: 'div', html: seq.sequence, - cls: 'clob', - style: 'word-wrap: break-word;width:500px;height:100px;padding:4px;margin:3px;font-size:0.9em;font-family:monospace' + cls: 'clob smaller', + style: 'word-wrap: break-word;width:500px;height:100px;padding:4px;margin:3px;font-family:monospace' } }); } diff --git a/gemma-web/src/main/webapp/scripts/api/visualization/VisualizationWidget.js b/gemma-web/src/main/webapp/scripts/api/visualization/VisualizationWidget.js index 69cdd1be45..65654fc99e 100755 --- a/gemma-web/src/main/webapp/scripts/api/visualization/VisualizationWidget.js +++ b/gemma-web/src/main/webapp/scripts/api/visualization/VisualizationWidget.js @@ -169,9 +169,10 @@ Gemma.prepareProfiles = function( data, showPValues ) { pvalueLabel = sprintf( "%.2e ", pvalue ); } - var labelStyle = ''; + // use a fixed font size that matches the heatmap row height + var labelStyle = 'font-size: 12px'; if ( factor && factor < 2 ) { - labelStyle = "font-style:italic"; + labelStyle += ";font-style:italic"; // qtip = qtip + " [Not significant]"; } diff --git a/gemma-web/src/main/webapp/styles/antisense/responsive.css b/gemma-web/src/main/webapp/styles/antisense/responsive.css index df202e2790..71b744e00c 100644 --- a/gemma-web/src/main/webapp/styles/antisense/responsive.css +++ b/gemma-web/src/main/webapp/styles/antisense/responsive.css @@ -38,6 +38,10 @@ display: none; } +.w-100 { + width: 100%; +} + /* small screens */ @media (min-width: 576px) { .container { diff --git a/pom.xml b/pom.xml index a3614752e3..7d9e1d9478 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ Gemma gemma gemma - 1.31.2 + 1.31.3 2005 The Gemma Project for meta-analysis of genomics data https://gemma.msl.ubc.ca @@ -140,7 +140,7 @@ baseCode baseCode - 1.1.20 + 1.1.21 @@ -496,7 +496,7 @@ 2.22.2 -Dlog4j1.compatibility=true -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager - true + ${redirectTestOutputToFile} **/*Test.java @@ -511,7 +511,7 @@ 2.22.2 -Dlog4j1.compatibility=true -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager - true + ${redirectTestOutputToFile} **/*Test.java @@ -646,5 +646,6 @@ ${skipTests} ${skipTests} + true