diff --git a/CHANGELOG.md b/CHANGELOG.md index 817e76c09..f1855f581 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ ### Changed +- Load corpus timespan data in parallel when loading app [#437](https://github.com/spraakbanken/korp-frontend/issues/437) + - Instead of `settings.time_data`, use `import { timeData } from "./timedata"` + - Await `getTimeData()` before using `timeData` or `corpus.time`/`corpus.non_time`. The function is memoized, so repeated calls will not affect performance - More space in word picture tables [#102](https://github.com/spraakbanken/korp-frontend/issues/102) ### Fixed diff --git a/app/scripts/backend/time-proxy.ts b/app/scripts/backend/time-proxy.ts deleted file mode 100644 index de7dd40bb..000000000 --- a/app/scripts/backend/time-proxy.ts +++ /dev/null @@ -1,70 +0,0 @@ -/** @format */ -import _ from "lodash" -import settings from "@/settings" -import BaseProxy from "@/backend/base-proxy" -import type { Histogram } from "@/backend/types" -import { Factory } from "@/util" -import { TimespanParams } from "./types/timespan" -import { korpRequest } from "./common" - -/** Data returned after slight mangling. */ -type TimeData = [ - Record, // Same as KorpTimespanResponse.corpora - [number, number][], // Tokens per time period, as pairs ordered by time period - number // Tokens in undated material -] - -export class TimeProxy extends BaseProxy { - async makeRequest(): Promise { - const params: TimespanParams = { - granularity: "y", - corpus: settings.corpusListing.stringifyAll(), - } - - const data = await korpRequest("timespan", params) - - const rest = data.combined[""] || 0 - delete data.combined[""] - - this.expandTimeStruct(data.combined) - const combined = this.compilePlotArray(data.combined) - - return [data.corpora, combined, rest] - } - - compilePlotArray(dataStruct: Histogram) { - let output: [number, number][] = [] - $.each(dataStruct, function (key, val) { - if (!key || !val) return - return output.push([parseInt(key), val]) - }) - - output = output.sort((a, b) => a[0] - b[0]) - return output - } - - /** Add each missing year with the previous year's value */ - expandTimeStruct(struct: Histogram): void { - const years = Object.keys(struct) - .filter((key) => key !== "") - .map(Number) - if (!years.length) return - - const minYear = Math.min(...years) - const maxYear = Math.max(...years) - - if (_.isNaN(maxYear) || _.isNaN(minYear)) { - console.log("expandTimestruct broken, years:", years) - return - } - - let prevCount = struct[`${minYear}`] - for (const year of _.range(minYear, maxYear)) { - if (struct[`${year}`] == undefined) struct[`${year}`] = prevCount - else prevCount = struct[`${year}`] - } - } -} - -const timeProxyFactory = new Factory(TimeProxy) -export default timeProxyFactory diff --git a/app/scripts/components/corpus-chooser/corpus-chooser.ts b/app/scripts/components/corpus-chooser/corpus-chooser.ts index 2cb911ddf..198b35a30 100644 --- a/app/scripts/components/corpus-chooser/corpus-chooser.ts +++ b/app/scripts/components/corpus-chooser/corpus-chooser.ts @@ -21,6 +21,7 @@ import { RootScope } from "@/root-scope.types" import { LocationService } from "@/urlparams" import { CorpusTransformed } from "@/settings/config-transformed.types" import { LangString } from "@/i18n/types" +import { getTimeData } from "@/timedata" type CorpusChooserController = IController & { credentials: string[] @@ -156,7 +157,11 @@ angular.module("korpApp").component("corpusChooser", { $ctrl.initialized = false $ctrl.showChooser = false - $ctrl.showTimeGraph = !!settings.has_timespan && !!settings.time_data[0].length + + // Load time data before showing time graph + getTimeData().then((data) => { + $ctrl.showTimeGraph = Boolean(data && data[0].length) + }) $ctrl.onShowChooser = () => { // don't open the chooser unless the info-call is done diff --git a/app/scripts/components/extended/cqp-term.ts b/app/scripts/components/extended/cqp-term.ts index 3897bdcb3..2d49f39b5 100644 --- a/app/scripts/components/extended/cqp-term.ts +++ b/app/scripts/components/extended/cqp-term.ts @@ -9,6 +9,7 @@ import { LocationService } from "@/urlparams" import { RootScope } from "@/root-scope.types" import { Condition, OperatorKorp } from "@/cqp_parser/cqp.types" import { AttributeOption } from "@/corpus_listing" +import { getTimeData } from "@/timedata" /** * TODO @@ -81,13 +82,15 @@ angular.module("korpApp").component("extendedCqpTerm", { ctrl.valfilter = valfilter ctrl.$onInit = () => { - $rootScope.$on("corpuschooserchange", () => $timeout(onCorpusChange)) + $rootScope.$on("corpuschooserchange", () => updateAttributes()) $rootScope.$watch( () => $location.search().parallel_corpora, - () => $timeout(onCorpusChange) + () => updateAttributes() ) + // React on the date interval attribute becoming available + getTimeData().then(() => updateAttributes()) - onCorpusChange() + updateAttributes() } ctrl.localChange = (term) => { @@ -95,24 +98,30 @@ angular.module("korpApp").component("extendedCqpTerm", { ctrl.change() } - function onCorpusChange() { + /** Update list of available attributes */ + async function updateAttributes() { // TODO: respect the setting 'wordAttributeSelector' and similar if (!settings.corpusListing.selected.length) return - // Get available attribute options - ctrl.types = settings.corpusListing - .getAttributeGroups("union", ctrl.parallellLang) - .filter((item) => !item["hide_extended"]) + // The date interval attribute is not available until time data is ready + if (ctrl.term.type == "date_interval") await getTimeData() - // Map attribute options by name. Prefix with `_.` for struct attrs for use in CQP. - ctrl.typeMapping = _.fromPairs( - ctrl.types.map((item) => [item["is_struct_attr"] ? `_.${item.value}` : item.value, item]) - ) + $timeout(() => { + // Get available attribute options + ctrl.types = settings.corpusListing + .getAttributeGroups("union", ctrl.parallellLang) + .filter((item) => !item["hide_extended"]) - // Reset attribute if the selected one is no longer available - if (!ctrl.typeMapping[ctrl.term.type]) ctrl.term.type = ctrl.types[0].value + // Map attribute options by name. Prefix with `_.` for struct attrs for use in CQP. + ctrl.typeMapping = _.fromPairs( + ctrl.types.map((item) => [item["is_struct_attr"] ? `_.${item.value}` : item.value, item]) + ) - ctrl.opts = getOpts() + // Reset attribute if the selected one is no longer available + if (!ctrl.typeMapping[ctrl.term.type]) ctrl.term.type = ctrl.types[0].value + + ctrl.opts = getOpts() + }) } const getOpts = () => getOptsMemo(ctrl.term.type) diff --git a/app/scripts/data_init.ts b/app/scripts/data_init.ts index 141ac813c..e684da589 100644 --- a/app/scripts/data_init.ts +++ b/app/scripts/data_init.ts @@ -3,7 +3,6 @@ import _ from "lodash" import memoize from "lodash/memoize" import settings, { setDefaultConfigValues } from "@/settings" import currentMode from "@/mode" -import timeProxyFactory from "@/backend/time-proxy" import { getAllCorporaInFolders } from "./components/corpus-chooser/util" import { CorpusListing } from "./corpus_listing" import { ParallelCorpusListing } from "./parallel/corpus_listing" @@ -59,28 +58,6 @@ async function getInfoData(corpusIds: string[]): Promise { })) } -/** Fetch and process time data for all corpora in the mode. */ -async function getTimeData(): Promise<[[number, number][], number]> { - const timeProxy = timeProxyFactory.create() - const [dataByCorpus, combined, rest] = await timeProxy.makeRequest() - - if (combined.length == 0) return [[], 0] - - // this adds data to the corpora in settings - for (const [id, struct] of Object.entries(dataByCorpus)) { - const corpus = settings.corpora[id.toLowerCase()] - timeProxy.expandTimeStruct(struct) - corpus.non_time = struct[""] - corpus.time = _.omit(struct, "") - // Enable the special date interval search attribute for corpora that have some timestamped data - if (Object.keys(corpus.time).length > 1) { - corpus.common_attributes ??= {} - corpus.common_attributes.date_interval = true - } - } - return [combined, rest] -} - async function getConfig(): Promise { // Load static corpus config if it exists. try { @@ -239,12 +216,7 @@ export async function fetchInitialData(authDef: Promise) { ) } - // if the previous config calls didn't yield any corpora, don't ask for time if (!_.isEmpty(settings.corpora)) { setInitialCorpora() - - if (settings.has_timespan) { - settings.time_data = await getTimeData() - } } } diff --git a/app/scripts/settings/settings.types.ts b/app/scripts/settings/settings.types.ts index 30016aad9..a97cf9af5 100644 --- a/app/scripts/settings/settings.types.ts +++ b/app/scripts/settings/settings.types.ts @@ -9,11 +9,6 @@ import { ConfigTransformed } from "./config-transformed.types" export type Settings = AppSettings & ConfigTransformed & { - // Populated in data_init.js fetchInitialData() using the `/timespan` API - time_data: [ - [number, number][], // Token count per year - number // Undated tokens - ] // Set in data_init.js fetchInitialData() corpusListing: CorpusListing } diff --git a/app/scripts/timedata.ts b/app/scripts/timedata.ts new file mode 100644 index 000000000..3a823e9a5 --- /dev/null +++ b/app/scripts/timedata.ts @@ -0,0 +1,80 @@ +/** @format */ +import { isNaN, memoize, omit, range } from "lodash" +import settings from "@/settings" +import { Histogram } from "./backend/types" +import { korpRequest } from "./backend/common" + +/** + * Time data, if available. + * + * This gets set in `getTimeData()`, so make sure to await that before using this. + */ +export let timeData: [[number, number][], number] | undefined + +/** Fetch and process time data for all corpora in the mode. */ +export const getTimeData: () => Promise<[[number, number][], number] | undefined> = memoize(async () => { + if (!settings.has_timespan) return undefined + + const data = await korpRequest("timespan", { + granularity: "y", + corpus: settings.corpusListing.stringifyAll(), + }) + + const rest = data.combined[""] || 0 + delete data.combined[""] + + expandTimeStruct(data.combined) + + // Re-structure the combined counts as year-count tuples for plotting + const combined: [number, number][] = Object.entries(data.combined) + .filter(([key, val]) => key && val) + .map(([key, val]) => [parseInt(key), val]) + combined.sort((a, b) => a[0] - b[0]) + if (combined.length == 0) return [[], 0] + + addToCorpora(data.corpora) + + // Store time data for non-async use + timeData = [combined, rest] + return timeData +}) + +/** Add each missing year with the previous year's value */ +function expandTimeStruct(struct: Histogram): void { + const years = Object.keys(struct) + .filter((key) => key !== "") + .map(Number) + if (!years.length) return + + const minYear = Math.min(...years) + const maxYear = Math.max(...years) + + if (isNaN(maxYear) || isNaN(minYear)) { + console.log("expandTimestruct broken, years:", years) + return + } + + let prevCount = struct[`${minYear}`] + for (const year of range(minYear, maxYear)) { + if (struct[`${year}`] == undefined) struct[`${year}`] = prevCount + else prevCount = struct[`${year}`] + } +} + +/** Add time data to corpora */ +function addToCorpora(dataByCorpus: Record) { + for (const [id, struct] of Object.entries(dataByCorpus)) { + const corpus = settings.corpora[id.toLowerCase()] + expandTimeStruct(struct) + corpus.non_time = struct[""] + corpus.time = omit(struct, "") + // Enable the special date interval search attribute for corpora that have some timestamped data + if (Object.keys(corpus.time).length > 1) { + corpus.common_attributes ??= {} + corpus.common_attributes.date_interval = true + } + } + + // Update list of common attributes + settings.corpusListing.updateAttributes() +} diff --git a/app/scripts/timeseries.ts b/app/scripts/timeseries.ts index c44456c8a..6a026c127 100644 --- a/app/scripts/timeseries.ts +++ b/app/scripts/timeseries.ts @@ -5,6 +5,7 @@ import fromPairs from "lodash/fromPairs" import pickBy from "lodash/pickBy" import range from "lodash/range" import settings from "@/settings" +import { timeData } from "./timedata" /** * Find some even points within a range of years. @@ -20,12 +21,11 @@ export function calculateYearTicks(min: number, max: number) { return range(round(min), round(max + 1), step) } -// Time data is fetched in data_init.js, to also provide data for search result trend diagram (?) /** Data size per year of all corpora. */ -export const getTimeDataPairs = (): [number, number][] => settings.time_data[0] +export const getTimeDataPairs = (): [number, number][] => timeData![0] /** Data size of unknown year in all corpora. */ -export const getCountUndated = (): number => settings.time_data[1] +export const getCountUndated = (): number => timeData![1] /** Get data size per year of all corpora. */ export const getSeries = () => fromPairs(getTimeDataPairs()) as YearSeries