From 7eeab32d923a05f81de2bd3c22b647f9689a9b39 Mon Sep 17 00:00:00 2001 From: Theo Sanderson Date: Mon, 24 Feb 2025 22:44:22 +0000 Subject: [PATCH] stats --- .../loculus/backend/api/StatisticsResponse.kt | 20 ++++++++++ .../controller/StatisticsController.kt | 27 +++++++++++++ .../submission/SubmissionDatabaseService.kt | 38 +++++++++++++++++++ 3 files changed, 85 insertions(+) create mode 100644 backend/src/main/kotlin/org/loculus/backend/api/StatisticsResponse.kt create mode 100644 backend/src/main/kotlin/org/loculus/backend/controller/StatisticsController.kt diff --git a/backend/src/main/kotlin/org/loculus/backend/api/StatisticsResponse.kt b/backend/src/main/kotlin/org/loculus/backend/api/StatisticsResponse.kt new file mode 100644 index 0000000000..ef21283f31 --- /dev/null +++ b/backend/src/main/kotlin/org/loculus/backend/api/StatisticsResponse.kt @@ -0,0 +1,20 @@ +package org.loculus.backend.api + +/** + * Response model for pipeline statistics endpoint. + * Shows the count of sequences by organism and pipeline version. + */ +data class PipelineStatisticsResponse( + // Map of organism name to statistics for that organism + val statistics: Map +) + +/** + * Statistics for a specific organism + */ +data class OrganismPipelineStatistics( + // Total number of sequences for this organism + val totalSequences: Int, + // Count of sequences by pipeline version + val sequencesByPipelineVersion: Map +) \ No newline at end of file diff --git a/backend/src/main/kotlin/org/loculus/backend/controller/StatisticsController.kt b/backend/src/main/kotlin/org/loculus/backend/controller/StatisticsController.kt new file mode 100644 index 0000000000..8f83dbc5e0 --- /dev/null +++ b/backend/src/main/kotlin/org/loculus/backend/controller/StatisticsController.kt @@ -0,0 +1,27 @@ +package org.loculus.backend.controller + +import io.swagger.v3.oas.annotations.Operation +import io.swagger.v3.oas.annotations.security.SecurityRequirement +import org.loculus.backend.api.PipelineStatisticsResponse +import org.loculus.backend.service.submission.SubmissionDatabaseService +import org.springframework.http.MediaType +import org.springframework.web.bind.annotation.GetMapping +import org.springframework.web.bind.annotation.RequestMapping +import org.springframework.web.bind.annotation.RestController + +@RestController +@RequestMapping("/statistics") +@SecurityRequirement(name = "bearerAuth") +class StatisticsController( + private val submissionDatabaseService: SubmissionDatabaseService, +) { + @Operation( + description = "Get statistics on sequences processed by pipeline version for each organism", + ) + @GetMapping("/pipeline-versions", produces = [MediaType.APPLICATION_JSON_VALUE]) + fun getPipelineStatistics(): PipelineStatisticsResponse { + return PipelineStatisticsResponse( + statistics = submissionDatabaseService.getPipelineStatistics() + ) + } +} \ No newline at end of file diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt index d958075d70..314d0dcb58 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt @@ -37,6 +37,7 @@ import org.jetbrains.exposed.sql.transactions.transaction import org.jetbrains.exposed.sql.update import org.jetbrains.exposed.sql.vendors.ForUpdateOption.PostgreSQL.ForUpdate import org.jetbrains.exposed.sql.vendors.ForUpdateOption.PostgreSQL.MODE +import org.loculus.backend.api.OrganismPipelineStatistics import org.loculus.backend.api.AccessionVersion import org.loculus.backend.api.AccessionVersionInterface import org.loculus.backend.api.AccessionVersionOriginalMetadata @@ -1121,6 +1122,43 @@ class SubmissionDatabaseService( SequenceEntriesTable.distinctOrganisms().map { organismName -> Pair(organismName, useNewerProcessingPipelineIfPossible(organismName)) }.toMap() + + /** + * Gets statistics on sequences grouped by organism and pipeline version + */ + fun getPipelineStatistics(): Map { + val statistics = mutableMapOf() + + SequenceEntriesTable.distinctOrganisms().forEach { organismName -> + val organism = Organism(organismName) + + // Count total sequences for this organism + val totalSequences = SequenceEntriesView + .select(Count(stringLiteral("*"))) + .where { SequenceEntriesView.organismIs(organism) } + .first()[Count(stringLiteral("*"))].toInt() + + // Count sequences by pipeline version + val pipelineVersionColumn = SequenceEntriesView.pipelineVersionColumn + val countColumn = Count(stringLiteral("*")) + + val pipelineVersionCounts = SequenceEntriesView + .select(pipelineVersionColumn, countColumn) + .where { + SequenceEntriesView.organismIs(organism) and + (pipelineVersionColumn.isNotNull()) + } + .groupBy(pipelineVersionColumn) + .associate { it[pipelineVersionColumn]!! to it[countColumn].toInt() } + + statistics[organismName] = OrganismPipelineStatistics( + totalSequences = totalSequences, + sequencesByPipelineVersion = pipelineVersionCounts + ) + } + + return statistics + } /** * Looks for new preprocessing pipeline version with [findNewPreprocessingPipelineVersion];