Skip to content

Commit

Permalink
feat: endpoint /api/v1/genes/search with OpenAPI (#575)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Nov 13, 2024
1 parent a474dd0 commit 244427e
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 41 deletions.
140 changes: 140 additions & 0 deletions openapi.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,50 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/CustomError'
/api/v1/genes/search:
get:
tags:
- genes_search
summary: Search for genes.
operationId: genesSearch
parameters:
- name: q
in: query
description: The string to search for.
required: true
schema:
type: string
- name: fields
in: query
description: The fields to search in.
required: false
schema:
type:
- array
- 'null'
items:
$ref: '#/components/schemas/GenesFields'
- name: case_sensitive
in: query
description: Enable case sensitive search.
required: false
schema:
type:
- boolean
- 'null'
responses:
'200':
description: Genes search results.
content:
application/json:
schema:
$ref: '#/components/schemas/GenesSearchResponse'
'500':
description: Internal server error.
content:
application/json:
schema:
$ref: '#/components/schemas/CustomError'
/api/v1/versionsInfo:
get:
tags:
Expand Down Expand Up @@ -84,6 +128,45 @@ components:
properties:
err:
type: string
GeneNames:
type: object
description: Identifier / name information for one gene.
required:
- hgnc_id
- symbol
- name
- alias_symbol
- alias_name
properties:
hgnc_id:
type: string
description: HGNC gene ID.
symbol:
type: string
description: HGNC gene symbol.
name:
type: string
description: Gene name from HGNC.
alias_symbol:
type: array
items:
type: string
description: HGNC alias symbols.
alias_name:
type: array
items:
type: string
description: HGNC alias names.
ensembl_gene_id:
type:
- string
- 'null'
description: ENSEMBL gene ID.
ncbi_gene_id:
type:
- string
- 'null'
description: NCBI gene ID.
GenesAcmgSecondaryFindingRecord:
type: object
description: Information from ACMG secondary findings list.
Expand Down Expand Up @@ -982,6 +1065,17 @@ components:
- Gene
- Str
- Region
GenesFields:
type: string
description: The allowed fields to search in.
enum:
- hgnc_id
- symbol
- name
- alias_symbol
- alias_name
- ensembl_gene_id
- ncbi_gene_id
GenesGeneData:
type: object
description: Gene identity information.
Expand Down Expand Up @@ -2131,6 +2225,38 @@ components:
format: int32
minimum: 0
description: PubMed IDs.
GenesSearchQuery:
type: object
description: Parameters for `handle`.
required:
- q
properties:
q:
type: string
description: The string to search for.
fields:
type:
- array
- 'null'
items:
$ref: '#/components/schemas/GenesFields'
description: The fields to search in.
case_sensitive:
type:
- boolean
- 'null'
description: Enable case sensitive search.
GenesSearchResponse:
type: object
description: Result for `handle`.
required:
- genes
properties:
genes:
type: array
items:
$ref: '#/components/schemas/Scored'
description: The resulting gene information.
GenesShetRecord:
type: object
description: Entry with sHet information (Weghorn et al., 2019).
Expand All @@ -2151,6 +2277,20 @@ components:
enum:
- grch37
- grch38
Scored:
type: object
description: A scored result.
required:
- score
- data
properties:
score:
type: number
format: float
description: The score.
data:
$ref: '#/components/schemas/GeneNames'
description: The result.
VersionsAnnotationInfo:
type: object
description: Version information for one database.
Expand Down
109 changes: 71 additions & 38 deletions src/server/run/genes_search.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
//! Implementation of `/genes/search` that allows to search for genes by symbol etc.
//! Implementation of endpoint `/api/v1/genes/search`.
//!
//! Also includes the implementation of the `/genes/search` endpoint (deprecated).
//!
//! Gene identifiers (HGNC, NCBI, ENSEMBL) must match. As for symbols and names, the
//! search string may also be a substring.
use actix_web::{
get,
web::{self, Data, Json, Path},
Responder,
};

use crate::server::run::GeneNames;
Expand All @@ -15,19 +16,19 @@ use serde_with::{formats::CommaSeparator, StringWithSeparator};

/// The allowed fields to search in.
#[derive(
serde::Serialize,
serde::Deserialize,
strum::Display,
strum::EnumString,
Debug,
Clone,
Copy,
PartialEq,
Eq,
strum::Display,
strum::EnumString,
serde::Serialize,
serde::Deserialize,
utoipa::ToSchema,
)]
#[serde(rename_all = "snake_case")]
#[strum(serialize_all = "snake_case")]
enum Fields {
pub(crate) enum GenesFields {
/// HGNC ID field
HgncId,
/// Symbol field
Expand All @@ -47,46 +48,48 @@ enum Fields {
/// Parameters for `handle`.
#[serde_with::skip_serializing_none]
#[serde_with::serde_as]
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
#[derive(
Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema, utoipa::IntoParams,
)]
#[serde(rename_all = "snake_case")]
struct Request {
pub(crate) struct GenesSearchQuery {
/// The string to search for.
pub q: String,
/// The fields to search in.
#[serde_as(as = "Option<StringWithSeparator::<CommaSeparator, Fields>>")]
pub fields: Option<Vec<Fields>>,
#[serde_as(as = "Option<StringWithSeparator::<CommaSeparator, GenesFields>>")]
pub fields: Option<Vec<GenesFields>>,
/// Enable case sensitive search.
pub case_sensitive: Option<bool>,
}

/// A scored result.
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
struct Scored<T> {
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
pub(crate) struct Scored<T> {
/// The score.
pub score: f32,
/// The result.
pub data: T,
}

/// Alias for scored genes names.
pub(crate) type GenesScoredGeneNames = Scored<GeneNames>;

/// Result for `handle`.
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
#[serde_with::skip_serializing_none]
struct Container {
// TODO: add data version
pub(crate) struct GenesSearchResponse {
/// The resulting gene information.
pub genes: Vec<Scored<GeneNames>>,
pub genes: Vec<GenesScoredGeneNames>,
}

/// Query for annotations for one variant.
#[allow(clippy::option_map_unit_fn)]
#[get("/genes/search")]
async fn handle(
/// Implementation of both endpoints.
async fn handle_impl(
data: Data<crate::server::run::WebServerData>,
_path: Path<()>,
query: web::Query<Request>,
) -> actix_web::Result<impl Responder, CustomError> {
query: web::Query<GenesSearchQuery>,
) -> actix_web::Result<Json<GenesSearchResponse>, CustomError> {
if query.q.len() < 2 {
return Ok(Json(Container {
return Ok(Json(GenesSearchResponse {
// server_version: VERSION.to_string(),
// builder_version,
genes: Vec::new(),
Expand Down Expand Up @@ -120,35 +123,36 @@ async fn handle(
val.to_lowercase().contains(&q)
}
};
let fields: Vec<Fields> = if let Some(fields) = query.fields.as_ref() {
let fields: Vec<GenesFields> = if let Some(fields) = query.fields.as_ref() {
fields.clone()
} else {
Vec::new()
};

// The fields contain the given field or are empty.
let fields_contains = |field: &Fields| -> bool { fields.is_empty() || fields.contains(field) };
let fields_contains =
|field: &GenesFields| -> bool { fields.is_empty() || fields.contains(field) };

let mut genes = genes_db
.data
.gene_names
.iter()
.map(|gn| -> Scored<GeneNames> {
let score = if (fields_contains(&Fields::HgncId) && equals_q(&gn.hgnc_id))
|| (fields_contains(&Fields::Symbol) && equals_q(&gn.symbol))
|| (fields_contains(&Fields::Symbol) && equals_q(&gn.symbol))
|| (fields_contains(&Fields::Name) && equals_q(&gn.name))
|| (fields_contains(&Fields::EnsemblGeneId)
let score = if (fields_contains(&GenesFields::HgncId) && equals_q(&gn.hgnc_id))
|| (fields_contains(&GenesFields::Symbol) && equals_q(&gn.symbol))
|| (fields_contains(&GenesFields::Symbol) && equals_q(&gn.symbol))
|| (fields_contains(&GenesFields::Name) && equals_q(&gn.name))
|| (fields_contains(&GenesFields::EnsemblGeneId)
&& gn.ensembl_gene_id.iter().any(|s| equals_q(s)))
|| (fields_contains(&Fields::NcbiGeneId)
|| (fields_contains(&GenesFields::NcbiGeneId)
&& gn.ncbi_gene_id.iter().any(|s| equals_q(s)))
{
1f32
} else if fields_contains(&Fields::Symbol) && contains_q(&gn.symbol) {
} else if fields_contains(&GenesFields::Symbol) && contains_q(&gn.symbol) {
q.len() as f32 / gn.symbol.len() as f32
} else if fields_contains(&Fields::Name) && contains_q(&gn.name) {
} else if fields_contains(&GenesFields::Name) && contains_q(&gn.name) {
q.len() as f32 / gn.name.len() as f32
} else if fields_contains(&Fields::AliasSymbol)
} else if fields_contains(&GenesFields::AliasSymbol)
&& gn.alias_symbol.iter().any(|s| contains_q(s))
{
gn.alias_symbol
Expand All @@ -162,7 +166,7 @@ async fn handle(
})
.max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.unwrap_or(0f32)
} else if fields_contains(&Fields::AliasName)
} else if fields_contains(&GenesFields::AliasName)
&& gn.alias_name.iter().any(|s| contains_q(s))
{
gn.alias_name
Expand Down Expand Up @@ -194,9 +198,38 @@ async fn handle(
.unwrap_or(std::cmp::Ordering::Equal)
});

Ok(Json(Container {
Ok(Json(GenesSearchResponse {
// server_version: VERSION.to_string(),
// builder_version,
genes,
}))
}

/// Search for genes.
#[get("/genes/search")]
async fn handle(
data: Data<crate::server::run::WebServerData>,
path: Path<()>,
query: web::Query<GenesSearchQuery>,
) -> actix_web::Result<Json<GenesSearchResponse>, CustomError> {
handle_impl(data, path, query).await
}

/// Search for genes.
#[utoipa::path(
get,
operation_id = "genesSearch",
params(GenesSearchQuery),
responses(
(status = 200, description = "Genes search results.", body = GenesSearchResponse),
(status = 500, description = "Internal server error.", body = CustomError)
)
)]
#[get("/api/v1/genes/search")]
async fn handle_with_openapi(
data: Data<crate::server::run::WebServerData>,
path: Path<()>,
query: web::Query<GenesSearchQuery>,
) -> actix_web::Result<Json<GenesSearchResponse>, CustomError> {
handle_impl(data, path, query).await
}
Loading

0 comments on commit 244427e

Please sign in to comment.