From f37967063b5bd33b524bdcaf4e07c1beddbddff5 Mon Sep 17 00:00:00 2001 From: Colin Diesh Date: Sun, 9 Feb 2025 16:23:03 -0500 Subject: [PATCH] Improve status updating for BEDTabix, GFF3Tabix, VCFTabix, BigBed (#4827) --- .../src/BedTabixAdapter/BedTabixAdapter.ts | 90 +++++++++++------ .../bed/src/BigBedAdapter/BigBedAdapter.ts | 32 +++--- .../src/Gff3TabixAdapter/Gff3TabixAdapter.ts | 98 +++++++++++-------- .../src/VcfTabixAdapter/VcfTabixAdapter.ts | 32 +++--- .../wiggle/src/BigWigAdapter/BigWigAdapter.ts | 36 ++++--- 5 files changed, 177 insertions(+), 111 deletions(-) diff --git a/plugins/bed/src/BedTabixAdapter/BedTabixAdapter.ts b/plugins/bed/src/BedTabixAdapter/BedTabixAdapter.ts index 4bfa432559..4c73038d1e 100644 --- a/plugins/bed/src/BedTabixAdapter/BedTabixAdapter.ts +++ b/plugins/bed/src/BedTabixAdapter/BedTabixAdapter.ts @@ -1,7 +1,7 @@ import BED from '@gmod/bed' import { TabixIndexedFile } from '@gmod/tabix' import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter' -import { SimpleFeature } from '@jbrowse/core/util' +import { SimpleFeature, updateStatus } from '@jbrowse/core/util' import { openLocation } from '@jbrowse/core/util/io' import { ObservableCreate } from '@jbrowse/core/util/rxjs' import { checkStopToken } from '@jbrowse/core/util/stopToken' @@ -25,6 +25,10 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter { public static capabilities = ['getFeatures', 'getRefNames'] + setupP?: Promise<{ + meta: Awaited> + }> + public constructor( config: AnyConfigurationModel, getSubAdapter?: getSubAdapterType, @@ -52,15 +56,37 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter { return this.bed.getReferenceSequenceNames(opts) } - async getHeader() { - return this.bed.getHeader() + async getHeader(opts?: BaseOptions) { + return this.bed.getHeader(opts) + } + + async getMetadataPre2(_opts?: BaseOptions) { + if (!this.setupP) { + this.setupP = this.getMetadataPre().catch((e: unknown) => { + this.setupP = undefined + throw e + }) + } + return this.setupP + } + + async getMetadataPre() { + const meta = await this.bed.getMetadata() + return { meta } + } + + async getMetadata(opts?: BaseOptions) { + const { statusCallback = () => {} } = opts || {} + return updateStatus('Downloading index', statusCallback, () => + this.getMetadataPre2(opts), + ) } async getNames() { if (this.columnNames.length) { return this.columnNames } - const header = await this.bed.getHeader() + const header = await this.getHeader() const defs = header.split(/\n|\r\n|\r/).filter(f => !!f) const defline = defs.at(-1) return defline?.includes('\t') @@ -71,10 +97,10 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter { : undefined } - public getFeatures(query: Region, opts: BaseOptions = {}) { - const { stopToken } = opts + public getFeatures(query: Region, opts?: BaseOptions) { + const { stopToken, statusCallback = () => {} } = opts || {} return ObservableCreate(async observer => { - const meta = await this.bed.getMetadata() + const { meta } = await this.getMetadata() const { columnNumbers } = meta const colRef = columnNumbers.ref - 1 const colStart = columnNumbers.start - 1 @@ -82,31 +108,33 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter { const names = await this.getNames() let start = performance.now() checkStopToken(stopToken) - await this.bed.getLines(query.refName, query.start, query.end, { - lineCallback: (line, fileOffset) => { - if (performance.now() - start > 200) { - checkStopToken(stopToken) - start = performance.now() - } - observer.next( - new SimpleFeature( - featureData({ - line, - colRef, - colStart, - colEnd, - scoreColumn: this.scoreColumn, - parser: this.parser, - uniqueId: `${this.id}-${fileOffset}`, - names, - }), - ), - ) - }, - stopToken: opts.stopToken, - }) + await updateStatus('Downloading features', statusCallback, () => + this.bed.getLines(query.refName, query.start, query.end, { + lineCallback: (line, fileOffset) => { + if (performance.now() - start > 200) { + checkStopToken(stopToken) + start = performance.now() + } + observer.next( + new SimpleFeature( + featureData({ + line, + colRef, + colStart, + colEnd, + scoreColumn: this.scoreColumn, + parser: this.parser, + uniqueId: `${this.id}-${fileOffset}`, + names, + }), + ), + ) + }, + stopToken, + }), + ) observer.complete() - }, opts.stopToken) + }, stopToken) } public freeResources(): void {} diff --git a/plugins/bed/src/BigBedAdapter/BigBedAdapter.ts b/plugins/bed/src/BigBedAdapter/BigBedAdapter.ts index b8528db918..490fd97582 100644 --- a/plugins/bed/src/BigBedAdapter/BigBedAdapter.ts +++ b/plugins/bed/src/BigBedAdapter/BigBedAdapter.ts @@ -1,7 +1,13 @@ import { BigBed } from '@gmod/bbi' import BED from '@gmod/bed' import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter' -import { SimpleFeature, doesIntersect2, max, min } from '@jbrowse/core/util' +import { + SimpleFeature, + doesIntersect2, + max, + min, + updateStatus, +} from '@jbrowse/core/util' import { openLocation } from '@jbrowse/core/util/io' import { ObservableCreate } from '@jbrowse/core/util/rxjs' import { firstValueFrom, toArray } from 'rxjs' @@ -95,18 +101,22 @@ export default class BigBedAdapter extends BaseFeatureDataAdapter { allowRedispatch: boolean originalQuery?: Region }) { - const { stopToken } = opts + const { stopToken, statusCallback = () => {} } = opts const scoreColumn = this.getConf('scoreColumn') const aggregateField = this.getConf('aggregateField') - const { parser, bigbed } = await this.configure(opts) - const feats = await bigbed.getFeatures( - query.refName, - query.start, - query.end, - { - stopToken, - basesPerSpan: query.end - query.start, - }, + const { parser, bigbed } = await updateStatus( + 'Downloading header', + statusCallback, + () => this.configure(opts), + ) + const feats = await updateStatus( + 'Downloading features', + statusCallback, + () => + bigbed.getFeatures(query.refName, query.start, query.end, { + stopToken, + basesPerSpan: query.end - query.start, + }), ) if (allowRedispatch && feats.length) { let minStart = Number.POSITIVE_INFINITY diff --git a/plugins/gff3/src/Gff3TabixAdapter/Gff3TabixAdapter.ts b/plugins/gff3/src/Gff3TabixAdapter/Gff3TabixAdapter.ts index 086860f572..f99c4a56fd 100644 --- a/plugins/gff3/src/Gff3TabixAdapter/Gff3TabixAdapter.ts +++ b/plugins/gff3/src/Gff3TabixAdapter/Gff3TabixAdapter.ts @@ -1,6 +1,6 @@ import { TabixIndexedFile } from '@gmod/tabix' -import { readConfObject } from '@jbrowse/core/configuration' import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter' +import { updateStatus } from '@jbrowse/core/util' import { openLocation } from '@jbrowse/core/util/io' import { doesIntersect2 } from '@jbrowse/core/util/range' import { ObservableCreate } from '@jbrowse/core/util/rxjs' @@ -9,10 +9,7 @@ import { parseStringSync } from 'gff-nostream' import { featureData } from '../featureData' -import type PluginManager from '@jbrowse/core/PluginManager' -import type { AnyConfigurationModel } from '@jbrowse/core/configuration' import type { BaseOptions } from '@jbrowse/core/data_adapters/BaseAdapter' -import type { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache' import type { Feature } from '@jbrowse/core/util/simpleFeature' import type { Region } from '@jbrowse/core/util/types' import type { Observer } from 'rxjs' @@ -25,48 +22,67 @@ interface LineFeature { } export default class Gff3TabixAdapter extends BaseFeatureDataAdapter { - protected gff: TabixIndexedFile - - protected dontRedispatch: string[] - - public constructor( - config: AnyConfigurationModel, - getSubAdapter?: getSubAdapterType, - pluginManager?: PluginManager, - ) { - super(config, getSubAdapter, pluginManager) - const gffGzLocation = readConfObject(config, 'gffGzLocation') - const indexType = readConfObject(config, ['index', 'indexType']) - const location = readConfObject(config, ['index', 'location']) - const dontRedispatch = readConfObject(config, 'dontRedispatch') - - this.dontRedispatch = dontRedispatch || ['chromosome', 'contig', 'region'] - this.gff = new TabixIndexedFile({ + private configured?: Promise<{ + gff: TabixIndexedFile + dontRedispatch: string[] + }> + + private async configurePre(_opts?: BaseOptions) { + const gffGzLocation = this.getConf('gffGzLocation') + const indexType = this.getConf(['index', 'indexType']) + const loc = this.getConf(['index', 'location']) + const dontRedispatch = this.getConf('dontRedispatch') || [ + 'chromosome', + 'contig', + 'region', + ] + const gff = new TabixIndexedFile({ filehandle: openLocation(gffGzLocation, this.pluginManager), csiFilehandle: - indexType === 'CSI' - ? openLocation(location, this.pluginManager) - : undefined, + indexType === 'CSI' ? openLocation(loc, this.pluginManager) : undefined, tbiFilehandle: - indexType !== 'CSI' - ? openLocation(location, this.pluginManager) - : undefined, + indexType !== 'CSI' ? openLocation(loc, this.pluginManager) : undefined, chunkCacheSize: 50 * 2 ** 20, renameRefSeqs: (n: string) => n, }) + + return { + gff, + dontRedispatch, + header: await gff.getHeader(), + } } + protected async configurePre2() { + if (!this.configured) { + this.configured = this.configurePre().catch((e: unknown) => { + this.configured = undefined + throw e + }) + } + return this.configured + } + + async configure(opts?: BaseOptions) { + const { statusCallback = () => {} } = opts || {} + return updateStatus('Downloading index', statusCallback, () => + this.configurePre2(), + ) + } public async getRefNames(opts: BaseOptions = {}) { - return this.gff.getReferenceSequenceNames(opts) + const { gff } = await this.configure(opts) + return gff.getReferenceSequenceNames(opts) } - public async getHeader() { - return this.gff.getHeader() + public async getHeader(opts: BaseOptions = {}) { + const { gff } = await this.configure(opts) + return gff.getHeader() } public getFeatures(query: Region, opts: BaseOptions = {}) { return ObservableCreate(async observer => { - const metadata = await this.gff.getMetadata() + const { gff } = await this.configure(opts) + const metadata = await gff.getMetadata() await this.getFeaturesHelper(query, opts, metadata, observer, true) }, opts.stopToken) } @@ -79,16 +95,20 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter { allowRedispatch: boolean, originalQuery = query, ) { + const { statusCallback = () => {} } = opts try { const lines: LineFeature[] = [] - await this.gff.getLines( - query.refName, - query.start, - query.end, - (line, fileOffset) => { - lines.push(this.parseLine(metadata.columnNumbers, line, fileOffset)) - }, + const { dontRedispatch, gff } = await this.configure(opts) + await updateStatus('Downloading features', statusCallback, () => + gff.getLines( + query.refName, + query.start, + query.end, + (line, fileOffset) => { + lines.push(this.parseLine(metadata.columnNumbers, line, fileOffset)) + }, + ), ) if (allowRedispatch && lines.length) { let minStart = Number.POSITIVE_INFINITY @@ -97,7 +117,7 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter { const featureType = line.fields[2]! // only expand redispatch range if feature is not a "dontRedispatch" // type skips large regions like chromosome,region - if (!this.dontRedispatch.includes(featureType)) { + if (!dontRedispatch.includes(featureType)) { const start = line.start - 1 // gff is 1-based if (start < minStart) { minStart = start diff --git a/plugins/variants/src/VcfTabixAdapter/VcfTabixAdapter.ts b/plugins/variants/src/VcfTabixAdapter/VcfTabixAdapter.ts index e7b2c436ed..8064bef847 100644 --- a/plugins/variants/src/VcfTabixAdapter/VcfTabixAdapter.ts +++ b/plugins/variants/src/VcfTabixAdapter/VcfTabixAdapter.ts @@ -17,8 +17,7 @@ export default class VcfTabixAdapter extends BaseFeatureDataAdapter { parser: VcfParser }> - private async configurePre(opts?: BaseOptions) { - const { statusCallback = () => {} } = opts || {} + private async configurePre(_opts?: BaseOptions) { const vcfGzLocation = this.getConf('vcfGzLocation') const location = this.getConf(['index', 'location']) const indexType = this.getConf(['index', 'indexType']) @@ -36,16 +35,15 @@ export default class VcfTabixAdapter extends BaseFeatureDataAdapter { chunkCacheSize: 50 * 2 ** 20, }) - const header = await updateStatus('Downloading index', statusCallback, () => - vcf.getHeader(), - ) return { vcf, - parser: new VcfParser({ header }), + parser: new VcfParser({ + header: await vcf.getHeader(), + }), } } - protected async configure() { + protected async configurePre2() { if (!this.configured) { this.configured = this.configurePre().catch((e: unknown) => { this.configured = undefined @@ -55,26 +53,27 @@ export default class VcfTabixAdapter extends BaseFeatureDataAdapter { return this.configured } + async configure(opts?: BaseOptions) { + const { statusCallback = () => {} } = opts || {} + return updateStatus('Downloading index', statusCallback, () => + this.configurePre2(), + ) + } public async getRefNames(opts: BaseOptions = {}) { - const { vcf } = await this.configure() + const { vcf } = await this.configure(opts) return vcf.getReferenceSequenceNames(opts) } - async getHeader() { - const { vcf } = await this.configure() + async getHeader(opts?: BaseOptions) { + const { vcf } = await this.configure(opts) return vcf.getHeader() } - async getMetadata() { - const { parser } = await this.configure() - return parser.getMetadata() - } - public getFeatures(query: NoAssemblyRegion, opts: BaseOptions = {}) { return ObservableCreate(async observer => { const { refName, start, end } = query const { statusCallback = () => {} } = opts - const { vcf, parser } = await this.configure() + const { vcf, parser } = await this.configure(opts) await updateStatus('Downloading variants', statusCallback, () => vcf.getLines(refName, start, end, { @@ -93,6 +92,7 @@ export default class VcfTabixAdapter extends BaseFeatureDataAdapter { observer.complete() }, opts.stopToken) } + async getSources() { const conf = this.getConf('samplesTsvLocation') if (conf.uri === '' || conf.uri === '/path/to/samples.tsv') { diff --git a/plugins/wiggle/src/BigWigAdapter/BigWigAdapter.ts b/plugins/wiggle/src/BigWigAdapter/BigWigAdapter.ts index c35852b68f..4ef040598c 100644 --- a/plugins/wiggle/src/BigWigAdapter/BigWigAdapter.ts +++ b/plugins/wiggle/src/BigWigAdapter/BigWigAdapter.ts @@ -28,16 +28,18 @@ export default class BigWigAdapter extends BaseFeatureDataAdapter { private async setupPre(opts?: BaseOptions) { const { statusCallback = () => {} } = opts || {} - const pm = this.pluginManager + const pluginManager = this.pluginManager const bigwig = new BigWig({ - filehandle: openLocation(this.getConf('bigWigLocation'), pm), + filehandle: openLocation(this.getConf('bigWigLocation'), pluginManager), }) - const header = await updateStatus( - 'Downloading bigwig header', - statusCallback, - () => bigwig.getHeader(opts), - ) - return { bigwig, header } + return { + bigwig, + header: await updateStatus( + 'Downloading bigwig header', + statusCallback, + () => bigwig.getHeader(opts), + ), + } } async setup(opts?: BaseOptions) { @@ -74,14 +76,18 @@ export default class BigWigAdapter extends BaseFeatureDataAdapter { statusCallback = () => {}, } = opts return ObservableCreate(async observer => { - statusCallback('Downloading bigwig data') const source = this.getConf('source') const resolutionMultiplier = this.getConf('resolutionMultiplier') const { bigwig } = await this.setup(opts) - const feats = await bigwig.getFeatures(refName, start, end, { - ...opts, - basesPerSpan: (bpPerPx / resolution) * resolutionMultiplier, - }) + const feats = await updateStatus( + 'Downloading bigwig data', + statusCallback, + () => + bigwig.getFeatures(refName, start, end, { + ...opts, + basesPerSpan: (bpPerPx / resolution) * resolutionMultiplier, + }), + ) for (const data of feats) { if (source) { @@ -106,7 +112,9 @@ export default class BigWigAdapter extends BaseFeatureDataAdapter { // always render bigwig instead of calculating a feature density for it async getMultiRegionFeatureDensityStats(_regions: Region[]) { - return { featureDensity: 0 } + return { + featureDensity: 0, + } } public freeResources(): void {}