Skip to content

Commit

Permalink
Improve status updating for BEDTabix, GFF3Tabix, VCFTabix, BigBed (#4827
Browse files Browse the repository at this point in the history
)
  • Loading branch information
cmdcolin authored Feb 9, 2025
1 parent 36fb12a commit f379670
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 111 deletions.
90 changes: 59 additions & 31 deletions plugins/bed/src/BedTabixAdapter/BedTabixAdapter.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import BED from '@gmod/bed'
import { TabixIndexedFile } from '@gmod/tabix'
import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter'
import { SimpleFeature } from '@jbrowse/core/util'
import { SimpleFeature, updateStatus } from '@jbrowse/core/util'
import { openLocation } from '@jbrowse/core/util/io'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import { checkStopToken } from '@jbrowse/core/util/stopToken'
Expand All @@ -25,6 +25,10 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter {

public static capabilities = ['getFeatures', 'getRefNames']

setupP?: Promise<{
meta: Awaited<ReturnType<TabixIndexedFile['getMetadata']>>
}>

public constructor(
config: AnyConfigurationModel,
getSubAdapter?: getSubAdapterType,
Expand Down Expand Up @@ -52,15 +56,37 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter {
return this.bed.getReferenceSequenceNames(opts)
}

async getHeader() {
return this.bed.getHeader()
async getHeader(opts?: BaseOptions) {
return this.bed.getHeader(opts)
}

async getMetadataPre2(_opts?: BaseOptions) {
if (!this.setupP) {
this.setupP = this.getMetadataPre().catch((e: unknown) => {
this.setupP = undefined
throw e
})
}
return this.setupP
}

async getMetadataPre() {
const meta = await this.bed.getMetadata()
return { meta }
}

async getMetadata(opts?: BaseOptions) {
const { statusCallback = () => {} } = opts || {}
return updateStatus('Downloading index', statusCallback, () =>
this.getMetadataPre2(opts),
)
}

async getNames() {
if (this.columnNames.length) {
return this.columnNames
}
const header = await this.bed.getHeader()
const header = await this.getHeader()
const defs = header.split(/\n|\r\n|\r/).filter(f => !!f)
const defline = defs.at(-1)
return defline?.includes('\t')
Expand All @@ -71,42 +97,44 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter {
: undefined
}

public getFeatures(query: Region, opts: BaseOptions = {}) {
const { stopToken } = opts
public getFeatures(query: Region, opts?: BaseOptions) {
const { stopToken, statusCallback = () => {} } = opts || {}
return ObservableCreate<Feature>(async observer => {
const meta = await this.bed.getMetadata()
const { meta } = await this.getMetadata()
const { columnNumbers } = meta
const colRef = columnNumbers.ref - 1
const colStart = columnNumbers.start - 1
const colEnd = columnNumbers.end - 1
const names = await this.getNames()
let start = performance.now()
checkStopToken(stopToken)
await this.bed.getLines(query.refName, query.start, query.end, {
lineCallback: (line, fileOffset) => {
if (performance.now() - start > 200) {
checkStopToken(stopToken)
start = performance.now()
}
observer.next(
new SimpleFeature(
featureData({
line,
colRef,
colStart,
colEnd,
scoreColumn: this.scoreColumn,
parser: this.parser,
uniqueId: `${this.id}-${fileOffset}`,
names,
}),
),
)
},
stopToken: opts.stopToken,
})
await updateStatus('Downloading features', statusCallback, () =>
this.bed.getLines(query.refName, query.start, query.end, {
lineCallback: (line, fileOffset) => {
if (performance.now() - start > 200) {
checkStopToken(stopToken)
start = performance.now()
}
observer.next(
new SimpleFeature(
featureData({
line,
colRef,
colStart,
colEnd,
scoreColumn: this.scoreColumn,
parser: this.parser,
uniqueId: `${this.id}-${fileOffset}`,
names,
}),
),
)
},
stopToken,
}),
)
observer.complete()
}, opts.stopToken)
}, stopToken)
}

public freeResources(): void {}
Expand Down
32 changes: 21 additions & 11 deletions plugins/bed/src/BigBedAdapter/BigBedAdapter.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import { BigBed } from '@gmod/bbi'
import BED from '@gmod/bed'
import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter'
import { SimpleFeature, doesIntersect2, max, min } from '@jbrowse/core/util'
import {
SimpleFeature,
doesIntersect2,
max,
min,
updateStatus,
} from '@jbrowse/core/util'
import { openLocation } from '@jbrowse/core/util/io'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import { firstValueFrom, toArray } from 'rxjs'
Expand Down Expand Up @@ -95,18 +101,22 @@ export default class BigBedAdapter extends BaseFeatureDataAdapter {
allowRedispatch: boolean
originalQuery?: Region
}) {
const { stopToken } = opts
const { stopToken, statusCallback = () => {} } = opts
const scoreColumn = this.getConf('scoreColumn')
const aggregateField = this.getConf('aggregateField')
const { parser, bigbed } = await this.configure(opts)
const feats = await bigbed.getFeatures(
query.refName,
query.start,
query.end,
{
stopToken,
basesPerSpan: query.end - query.start,
},
const { parser, bigbed } = await updateStatus(
'Downloading header',
statusCallback,
() => this.configure(opts),
)
const feats = await updateStatus(
'Downloading features',
statusCallback,
() =>
bigbed.getFeatures(query.refName, query.start, query.end, {
stopToken,
basesPerSpan: query.end - query.start,
}),
)
if (allowRedispatch && feats.length) {
let minStart = Number.POSITIVE_INFINITY
Expand Down
98 changes: 59 additions & 39 deletions plugins/gff3/src/Gff3TabixAdapter/Gff3TabixAdapter.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { TabixIndexedFile } from '@gmod/tabix'
import { readConfObject } from '@jbrowse/core/configuration'
import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter'
import { updateStatus } from '@jbrowse/core/util'
import { openLocation } from '@jbrowse/core/util/io'
import { doesIntersect2 } from '@jbrowse/core/util/range'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
Expand All @@ -9,10 +9,7 @@ import { parseStringSync } from 'gff-nostream'

import { featureData } from '../featureData'

import type PluginManager from '@jbrowse/core/PluginManager'
import type { AnyConfigurationModel } from '@jbrowse/core/configuration'
import type { BaseOptions } from '@jbrowse/core/data_adapters/BaseAdapter'
import type { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache'
import type { Feature } from '@jbrowse/core/util/simpleFeature'
import type { Region } from '@jbrowse/core/util/types'
import type { Observer } from 'rxjs'
Expand All @@ -25,48 +22,67 @@ interface LineFeature {
}

export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
protected gff: TabixIndexedFile

protected dontRedispatch: string[]

public constructor(
config: AnyConfigurationModel,
getSubAdapter?: getSubAdapterType,
pluginManager?: PluginManager,
) {
super(config, getSubAdapter, pluginManager)
const gffGzLocation = readConfObject(config, 'gffGzLocation')
const indexType = readConfObject(config, ['index', 'indexType'])
const location = readConfObject(config, ['index', 'location'])
const dontRedispatch = readConfObject(config, 'dontRedispatch')

this.dontRedispatch = dontRedispatch || ['chromosome', 'contig', 'region']
this.gff = new TabixIndexedFile({
private configured?: Promise<{
gff: TabixIndexedFile
dontRedispatch: string[]
}>

private async configurePre(_opts?: BaseOptions) {
const gffGzLocation = this.getConf('gffGzLocation')
const indexType = this.getConf(['index', 'indexType'])
const loc = this.getConf(['index', 'location'])
const dontRedispatch = this.getConf('dontRedispatch') || [
'chromosome',
'contig',
'region',
]
const gff = new TabixIndexedFile({
filehandle: openLocation(gffGzLocation, this.pluginManager),
csiFilehandle:
indexType === 'CSI'
? openLocation(location, this.pluginManager)
: undefined,
indexType === 'CSI' ? openLocation(loc, this.pluginManager) : undefined,
tbiFilehandle:
indexType !== 'CSI'
? openLocation(location, this.pluginManager)
: undefined,
indexType !== 'CSI' ? openLocation(loc, this.pluginManager) : undefined,
chunkCacheSize: 50 * 2 ** 20,
renameRefSeqs: (n: string) => n,
})

return {
gff,
dontRedispatch,
header: await gff.getHeader(),
}
}

protected async configurePre2() {
if (!this.configured) {
this.configured = this.configurePre().catch((e: unknown) => {
this.configured = undefined
throw e
})
}
return this.configured
}

async configure(opts?: BaseOptions) {
const { statusCallback = () => {} } = opts || {}
return updateStatus('Downloading index', statusCallback, () =>
this.configurePre2(),
)
}
public async getRefNames(opts: BaseOptions = {}) {
return this.gff.getReferenceSequenceNames(opts)
const { gff } = await this.configure(opts)
return gff.getReferenceSequenceNames(opts)
}

public async getHeader() {
return this.gff.getHeader()
public async getHeader(opts: BaseOptions = {}) {
const { gff } = await this.configure(opts)
return gff.getHeader()
}

public getFeatures(query: Region, opts: BaseOptions = {}) {
return ObservableCreate<Feature>(async observer => {
const metadata = await this.gff.getMetadata()
const { gff } = await this.configure(opts)
const metadata = await gff.getMetadata()
await this.getFeaturesHelper(query, opts, metadata, observer, true)
}, opts.stopToken)
}
Expand All @@ -79,16 +95,20 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
allowRedispatch: boolean,
originalQuery = query,
) {
const { statusCallback = () => {} } = opts
try {
const lines: LineFeature[] = []

await this.gff.getLines(
query.refName,
query.start,
query.end,
(line, fileOffset) => {
lines.push(this.parseLine(metadata.columnNumbers, line, fileOffset))
},
const { dontRedispatch, gff } = await this.configure(opts)
await updateStatus('Downloading features', statusCallback, () =>
gff.getLines(
query.refName,
query.start,
query.end,
(line, fileOffset) => {
lines.push(this.parseLine(metadata.columnNumbers, line, fileOffset))
},
),
)
if (allowRedispatch && lines.length) {
let minStart = Number.POSITIVE_INFINITY
Expand All @@ -97,7 +117,7 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
const featureType = line.fields[2]!
// only expand redispatch range if feature is not a "dontRedispatch"
// type skips large regions like chromosome,region
if (!this.dontRedispatch.includes(featureType)) {
if (!dontRedispatch.includes(featureType)) {
const start = line.start - 1 // gff is 1-based
if (start < minStart) {
minStart = start
Expand Down
Loading

0 comments on commit f379670

Please sign in to comment.