Skip to content

Commit

Permalink
perf: load timespan async
Browse files Browse the repository at this point in the history
Fixes #437
  • Loading branch information
arildm committed Mar 3, 2025
1 parent c987ec9 commit 83a0807
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 122 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

### Changed

- Load corpus timespan data in parallel when loading app [#437](https://github.com/spraakbanken/korp-frontend/issues/437)
- Instead of `settings.time_data`, use `import { timeData } from "./timedata"`
- Await `getTimeData()` before using `timeData` or `corpus.time`/`corpus.non_time`. The function is memoized, so repeated calls will not affect performance
- More space in word picture tables [#102](https://github.com/spraakbanken/korp-frontend/issues/102)

### Fixed
Expand Down
70 changes: 0 additions & 70 deletions app/scripts/backend/time-proxy.ts

This file was deleted.

7 changes: 6 additions & 1 deletion app/scripts/components/corpus-chooser/corpus-chooser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { RootScope } from "@/root-scope.types"
import { LocationService } from "@/urlparams"
import { CorpusTransformed } from "@/settings/config-transformed.types"
import { LangString } from "@/i18n/types"
import { getTimeData } from "@/timedata"

type CorpusChooserController = IController & {
credentials: string[]
Expand Down Expand Up @@ -156,7 +157,11 @@ angular.module("korpApp").component("corpusChooser", {

$ctrl.initialized = false
$ctrl.showChooser = false
$ctrl.showTimeGraph = !!settings.has_timespan && !!settings.time_data[0].length

// Load time data before showing time graph
getTimeData().then((data) => {
$ctrl.showTimeGraph = Boolean(data && data[0].length)
})

$ctrl.onShowChooser = () => {
// don't open the chooser unless the info-call is done
Expand Down
39 changes: 24 additions & 15 deletions app/scripts/components/extended/cqp-term.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { LocationService } from "@/urlparams"
import { RootScope } from "@/root-scope.types"
import { Condition, OperatorKorp } from "@/cqp_parser/cqp.types"
import { AttributeOption } from "@/corpus_listing"
import { getTimeData } from "@/timedata"

/**
* TODO
Expand Down Expand Up @@ -81,38 +82,46 @@ angular.module("korpApp").component("extendedCqpTerm", {
ctrl.valfilter = valfilter

ctrl.$onInit = () => {
$rootScope.$on("corpuschooserchange", () => $timeout(onCorpusChange))
$rootScope.$on("corpuschooserchange", () => updateAttributes())
$rootScope.$watch(
() => $location.search().parallel_corpora,
() => $timeout(onCorpusChange)
() => updateAttributes()
)
// React on the date interval attribute becoming available
getTimeData().then(() => updateAttributes())

onCorpusChange()
updateAttributes()
}

ctrl.localChange = (term) => {
Object.assign(ctrl.term, term)
ctrl.change()
}

function onCorpusChange() {
/** Update list of available attributes */
async function updateAttributes() {
// TODO: respect the setting 'wordAttributeSelector' and similar
if (!settings.corpusListing.selected.length) return

// Get available attribute options
ctrl.types = settings.corpusListing
.getAttributeGroups("union", ctrl.parallellLang)
.filter((item) => !item["hide_extended"])
// The date interval attribute is not available until time data is ready
if (ctrl.term.type == "date_interval") await getTimeData()

// Map attribute options by name. Prefix with `_.` for struct attrs for use in CQP.
ctrl.typeMapping = _.fromPairs(
ctrl.types.map((item) => [item["is_struct_attr"] ? `_.${item.value}` : item.value, item])
)
$timeout(() => {
// Get available attribute options
ctrl.types = settings.corpusListing
.getAttributeGroups("union", ctrl.parallellLang)
.filter((item) => !item["hide_extended"])

// Reset attribute if the selected one is no longer available
if (!ctrl.typeMapping[ctrl.term.type]) ctrl.term.type = ctrl.types[0].value
// Map attribute options by name. Prefix with `_.` for struct attrs for use in CQP.
ctrl.typeMapping = _.fromPairs(
ctrl.types.map((item) => [item["is_struct_attr"] ? `_.${item.value}` : item.value, item])
)

ctrl.opts = getOpts()
// Reset attribute if the selected one is no longer available
if (!ctrl.typeMapping[ctrl.term.type]) ctrl.term.type = ctrl.types[0].value

ctrl.opts = getOpts()
})
}

const getOpts = () => getOptsMemo(ctrl.term.type)
Expand Down
28 changes: 0 additions & 28 deletions app/scripts/data_init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import _ from "lodash"
import memoize from "lodash/memoize"
import settings, { setDefaultConfigValues } from "@/settings"
import currentMode from "@/mode"
import timeProxyFactory from "@/backend/time-proxy"
import { getAllCorporaInFolders } from "./components/corpus-chooser/util"
import { CorpusListing } from "./corpus_listing"
import { ParallelCorpusListing } from "./parallel/corpus_listing"
Expand Down Expand Up @@ -59,28 +58,6 @@ async function getInfoData(corpusIds: string[]): Promise<InfoData> {
}))
}

/** Fetch and process time data for all corpora in the mode. */
async function getTimeData(): Promise<[[number, number][], number]> {
const timeProxy = timeProxyFactory.create()
const [dataByCorpus, combined, rest] = await timeProxy.makeRequest()

if (combined.length == 0) return [[], 0]

// this adds data to the corpora in settings
for (const [id, struct] of Object.entries(dataByCorpus)) {
const corpus = settings.corpora[id.toLowerCase()]
timeProxy.expandTimeStruct(struct)
corpus.non_time = struct[""]
corpus.time = _.omit(struct, "")
// Enable the special date interval search attribute for corpora that have some timestamped data
if (Object.keys(corpus.time).length > 1) {
corpus.common_attributes ??= {}
corpus.common_attributes.date_interval = true
}
}
return [combined, rest]
}

async function getConfig(): Promise<Config> {
// Load static corpus config if it exists.
try {
Expand Down Expand Up @@ -239,12 +216,7 @@ export async function fetchInitialData(authDef: Promise<boolean>) {
)
}

// if the previous config calls didn't yield any corpora, don't ask for time
if (!_.isEmpty(settings.corpora)) {
setInitialCorpora()

if (settings.has_timespan) {
settings.time_data = await getTimeData()
}
}
}
5 changes: 0 additions & 5 deletions app/scripts/settings/settings.types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,6 @@ import { ConfigTransformed } from "./config-transformed.types"

export type Settings = AppSettings &
ConfigTransformed & {
// Populated in data_init.js fetchInitialData() using the `/timespan` API
time_data: [
[number, number][], // Token count per year
number // Undated tokens
]
// Set in data_init.js fetchInitialData()
corpusListing: CorpusListing
}
80 changes: 80 additions & 0 deletions app/scripts/timedata.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/** @format */
import { isNaN, memoize, omit, range } from "lodash"
import settings from "@/settings"
import { Histogram } from "./backend/types"
import { korpRequest } from "./backend/common"

/**
* Time data, if available.
*
* This gets set in `getTimeData()`, so make sure to await that before using this.
*/
export let timeData: [[number, number][], number] | undefined

/** Fetch and process time data for all corpora in the mode. */
export const getTimeData: () => Promise<[[number, number][], number] | undefined> = memoize(async () => {
if (!settings.has_timespan) return undefined

const data = await korpRequest("timespan", {
granularity: "y",
corpus: settings.corpusListing.stringifyAll(),
})

const rest = data.combined[""] || 0
delete data.combined[""]

expandTimeStruct(data.combined)

// Re-structure the combined counts as year-count tuples for plotting
const combined: [number, number][] = Object.entries(data.combined)
.filter(([key, val]) => key && val)
.map(([key, val]) => [parseInt(key), val])
combined.sort((a, b) => a[0] - b[0])
if (combined.length == 0) return [[], 0]

addToCorpora(data.corpora)

// Store time data for non-async use
timeData = [combined, rest]
return timeData
})

/** Add each missing year with the previous year's value */
function expandTimeStruct(struct: Histogram): void {
const years = Object.keys(struct)
.filter((key) => key !== "")
.map(Number)
if (!years.length) return

const minYear = Math.min(...years)
const maxYear = Math.max(...years)

if (isNaN(maxYear) || isNaN(minYear)) {
console.log("expandTimestruct broken, years:", years)
return
}

let prevCount = struct[`${minYear}`]
for (const year of range(minYear, maxYear)) {
if (struct[`${year}`] == undefined) struct[`${year}`] = prevCount
else prevCount = struct[`${year}`]
}
}

/** Add time data to corpora */
function addToCorpora(dataByCorpus: Record<string, Histogram>) {
for (const [id, struct] of Object.entries(dataByCorpus)) {
const corpus = settings.corpora[id.toLowerCase()]
expandTimeStruct(struct)
corpus.non_time = struct[""]
corpus.time = omit(struct, "")
// Enable the special date interval search attribute for corpora that have some timestamped data
if (Object.keys(corpus.time).length > 1) {
corpus.common_attributes ??= {}
corpus.common_attributes.date_interval = true
}
}

// Update list of common attributes
settings.corpusListing.updateAttributes()
}
6 changes: 3 additions & 3 deletions app/scripts/timeseries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import fromPairs from "lodash/fromPairs"
import pickBy from "lodash/pickBy"
import range from "lodash/range"
import settings from "@/settings"
import { timeData } from "./timedata"

/**
* Find some even points within a range of years.
Expand All @@ -20,12 +21,11 @@ export function calculateYearTicks(min: number, max: number) {
return range(round(min), round(max + 1), step)
}

// Time data is fetched in data_init.js, to also provide data for search result trend diagram (?)
/** Data size per year of all corpora. */
export const getTimeDataPairs = (): [number, number][] => settings.time_data[0]
export const getTimeDataPairs = (): [number, number][] => timeData![0]

/** Data size of unknown year in all corpora. */
export const getCountUndated = (): number => settings.time_data[1]
export const getCountUndated = (): number => timeData![1]

/** Get data size per year of all corpora. */
export const getSeries = () => fromPairs(getTimeDataPairs()) as YearSeries
Expand Down

0 comments on commit 83a0807

Please sign in to comment.