Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change ingestion input to JSON lines format #639

Merged
merged 8 commits into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ export const ML_RESPONSE_PROCESSOR_EXAMPLE_DOCS_LINK =
'https://opensearch.org/docs/latest/search-plugins/search-pipelines/ml-inference-search-response/#example-externally-hosted-text-embedding-model';
export const UPDATE_MODEL_DOCS_LINK =
'https://opensearch.org/docs/latest/ml-commons-plugin/api/model-apis/update-model/';
export const JSONLINES_LINK = 'https://jsonlines.org/';

/**
* Text chunking algorithm constants
Expand Down
1 change: 1 addition & 0 deletions common/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export type ConfigFieldType =
| 'json'
| 'jsonArray'
| 'jsonString'
| 'jsonLines'
| 'select'
| 'model'
| 'map'
Expand Down
4 changes: 4 additions & 0 deletions common/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ export function customStringify(jsonObj: {} | []): string {
return JSON.stringify(jsonObj, undefined, 2);
}

export function customStringifySingleLine(jsonObj: {}): string {
return JSON.stringify(jsonObj, undefined, 0);
}

export function isVectorSearchUseCase(workflow: Workflow | undefined): boolean {
return (
workflow?.ui_metadata?.type !== undefined &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ export function SourceData(props: SourceDataProps) {
// empty/populated docs state
let docs = [];
try {
docs = JSON.parse(getIn(values, 'ingest.docs', []));
const lines = getIn(values, 'ingest.docs', '').split('\n') as string[];
lines.forEach((line) => docs.push(JSON.parse(line)));
} catch {}
const docsPopulated = docs.length > 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,17 @@ import {
EuiSmallButtonEmpty,
EuiButtonGroup,
EuiCompressedComboBox,
EuiLink,
} from '@elastic/eui';
import { JsonField } from '../input_fields';
import { JsonLinesField } from '../input_fields';
import {
customStringify,
customStringifySingleLine,
FETCH_ALL_QUERY_LARGE,
IConfigField,
IndexMappings,
IngestDocsFormValues,
isVectorSearchUseCase,
JSONLINES_LINK,
MAX_BYTES_FORMATTED,
MAX_DOCS_TO_IMPORT,
SearchHit,
Expand Down Expand Up @@ -72,11 +74,11 @@ export function SourceDataModal(props: SourceDataProps) {

// sub-form values/schema
const docsFormValues = {
docs: getInitialValue('jsonArray'),
docs: getInitialValue('jsonLines'),
} as IngestDocsFormValues;
const docsFormSchema = yup.object({
docs: getFieldSchema({
type: 'jsonArray',
type: 'jsonLines',
} as IConfigField),
}) as yup.Schema;

Expand Down Expand Up @@ -177,8 +179,14 @@ export function SourceDataModal(props: SourceDataProps) {
.then((resp) => {
const docObjs = resp?.hits?.hits
?.slice(0, MAX_DOCS_TO_IMPORT)
?.map((hit: SearchHit) => hit?._source);
formikProps.setFieldValue('docs', customStringify(docObjs));
?.map((hit: SearchHit) => hit?._source) as {}[];
let jsonLinesStr = '';
try {
docObjs.forEach((docObj) => {
jsonLinesStr += customStringifySingleLine(docObj) + '\n';
});
} catch {}
formikProps.setFieldValue('docs', jsonLinesStr);
});
}
}, [selectedIndex]);
Expand Down Expand Up @@ -234,7 +242,7 @@ export function SourceDataModal(props: SourceDataProps) {
{props.selectedOption === SOURCE_OPTIONS.UPLOAD && (
<>
<EuiCompressedFilePicker
accept="application/json"
accept=".jsonl"
multiple={false}
initialPromptText="Upload file"
onChange={(files) => {
Expand All @@ -247,6 +255,7 @@ export function SourceDataModal(props: SourceDataProps) {
'docs',
e.target.result as string
);
formikProps.setFieldTouched('docs');
}
};
fileReader.readAsText(files[0]);
Expand Down Expand Up @@ -286,12 +295,20 @@ export function SourceDataModal(props: SourceDataProps) {
<EuiSpacer size="xs" />
</>
)}
<JsonField
<JsonLinesField
label="Documents to be imported"
fieldPath={'docs'}
helpText="Documents must be in a JSON array format."
helpText={
<EuiText size="s">
Documents must be in JSON lines format.{' '}
<EuiLink href={JSONLINES_LINK} target="_blank">
Learn more
</EuiLink>
</EuiText>
}
editorHeight="40vh"
readOnly={false}
validate={true}
/>
</>
</EuiModalBody>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

export { TextField } from './text_field';
export { JsonField } from './json_field';
export { JsonLinesField } from './json_lines_field';
export { ModelField } from './model_field';
export { MapField } from './map_field';
export { MapArrayField } from './map_array_field';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ interface JsonFieldProps {
* in some custom JSON
*/
export function JsonField(props: JsonFieldProps) {
const validate = props.validate !== undefined ? props.validate : true;
const validate = props.validate ?? true;

const { errors, touched, values } = useFormikContext<WorkflowFormValues>();

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

import React, { ReactNode, useEffect, useState } from 'react';
import { Field, FieldProps, getIn, useFormikContext } from 'formik';
import { isEmpty } from 'lodash';
import {
EuiCodeEditor,
EuiCompressedFormRow,
EuiLink,
EuiText,
} from '@elastic/eui';
import {
customStringifySingleLine,
WorkflowFormValues,
} from '../../../../../common';
import { camelCaseToTitleString } from '../../../../utils';

interface JsonLinesFieldProps {
fieldPath: string; // the full path in string-form to the field (e.g., 'ingest.enrich.processors.text_embedding_processor.inputField')
validate?: boolean;
label?: string;
helpLink?: string;
helpText?: string | ReactNode;
editorHeight?: string;
readOnly?: boolean;
}

/**
* An input field for a component where users input data in JSON Lines format.
* https://jsonlines.org/
*/
export function JsonLinesField(props: JsonLinesFieldProps) {
const validate = props.validate ?? true;

const { errors, touched, values } = useFormikContext<WorkflowFormValues>();

// temp input state. only format when users click out of the code editor
const [jsonStr, setJsonStr] = useState<string>('{}');
const [customErrMsg, setCustomErrMsg] = useState<string | undefined>(
undefined
);

// initializing the text to be the stringified form value
useEffect(() => {
if (props.fieldPath && values) {
const formValue = getIn(values, props.fieldPath) as string;
if (formValue) {
setJsonStr(formValue);
}
}
}, [props.fieldPath, values]);

return (
<Field name={props.fieldPath}>
{({ field, form }: FieldProps) => {
return (
<EuiCompressedFormRow
fullWidth={true}
key={props.fieldPath}
label={props.label || camelCaseToTitleString(field.name)}
labelAppend={
props.helpLink ? (
<EuiText size="xs">
<EuiLink href={props.helpLink} target="_blank">
Learn more
</EuiLink>
</EuiText>
) : undefined
}
helpText={props.helpText || undefined}
error={
validate ? (
<>
{customErrMsg?.split('\n')?.map((errMsg, idx) => {
return (
<EuiText key={idx} color="danger" size="s">
{errMsg}
</EuiText>
);
})}
</>
) : undefined
}
isInvalid={
validate
? getIn(errors, field.name) && getIn(touched, field.name)
: false
}
>
<EuiCodeEditor
mode="hjson"
theme="textmate"
width="100%"
height={props.editorHeight || '15vh'}
value={jsonStr}
onChange={(input) => {
setJsonStr(input);
form.setFieldValue(field.name, input);
setCustomErrMsg(undefined);
}}
onBlur={() => {
form.setFieldTouched(field.name);
let finalJsonStr = '';
let errs = [] as string[];
try {
const lines = jsonStr?.split('\n');
lines.forEach((line: string, idx) => {
if (line.trim() !== '') {
let parsedLine = {};
try {
parsedLine = JSON.parse(line);
} catch (error) {
errs.push(
getFormattedErrorMsg(error as Error, idx + 1)
);
}
if (!isEmpty(parsedLine)) {
finalJsonStr +=
customStringifySingleLine(JSON.parse(line)) + '\n';
}
}
});
// remove trailing newline
if (finalJsonStr !== '') {
finalJsonStr = finalJsonStr.slice(0, -1);
}

if (errs?.length > 0) {
setCustomErrMsg(getFormattedErrorMsgList(errs));
} else {
form.setFieldValue(field.name, finalJsonStr);
setCustomErrMsg(undefined);
}
} catch (error) {}
}}
readOnly={props.readOnly || false}
setOptions={{
fontSize: '14px',
useWorker: false,
highlightActiveLine: !props.readOnly,
highlightSelectedWord: !props.readOnly,
highlightGutterLine: !props.readOnly,
wrap: true,
}}
aria-label="Code Editor"
/>
</EuiCompressedFormRow>
);
}}
</Field>
);
}

// Parse out the useful information from an error triggered during JSON parsing failure
function getFormattedErrorMsg(error: Error, idx: number): string {
return `Error on line ${idx}: ${getIn(error, 'message', 'Invalid JSON')
.replace(/^(.*?)\s+in JSON.*/, '$1')
.replace(/^(.*?)\s+after JSON.*/, '$1')}`;
}

// Verbosely display a few error messages, list the count of remaining ones.
function getFormattedErrorMsgList(errors: string[]): string {
let finalMsg = '';
const verboseErrors = errors.slice(0, 3);
const nonVerboseErrorCount = errors.length - 3;
verboseErrors.forEach((error) => {
finalMsg += error + '\n';
});
if (nonVerboseErrorCount > 0) {
finalMsg += `${nonVerboseErrorCount} more error${
nonVerboseErrorCount > 1 ? 's' : ''
}`;
} else if (finalMsg !== '') {
// remove trailing newline
finalMsg = finalMsg.slice(0, -1);
}
return finalMsg;
}
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ export function ConfigureExpressionModal(props: ConfigureExpressionModalProps) {
const docs = getIn(values, 'ingest.docs');
let docObjs = [] as {}[] | undefined;
try {
docObjs = JSON.parse(docs);
const lines = docs?.split('\n') as string[];
lines.forEach((line) => docObjs?.push(JSON.parse(line)));
} catch {}
const query = getIn(values, 'search.request');
let queryObj = {} as {} | undefined;
Expand Down Expand Up @@ -465,9 +466,13 @@ export function ConfigureExpressionModal(props: ConfigureExpressionModalProps) {
});
} else {
try {
const docObjs = JSON.parse(
values.ingest.docs
) as {}[];
const docObjs = [] as {}[];
const lines = values?.ingest?.docs?.split(
'\n'
) as string[];
lines.forEach((line) =>
docObjs?.push(JSON.parse(line))
);
if (docObjs.length > 0) {
setSourceInput(
customStringify(docObjs[0])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ export function ConfigureMultiExpressionModal(
const docs = getIn(values, 'ingest.docs');
let docObjs = [] as {}[] | undefined;
try {
docObjs = JSON.parse(docs);
const lines = docs?.split('\n') as string[];
lines.forEach((line) => docObjs?.push(JSON.parse(line)));
} catch {}
const query = getIn(values, 'search.request');
let queryObj = {} as {} | undefined;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ export function ConfigureTemplateModal(props: ConfigureTemplateModalProps) {
const docs = getIn(values, 'ingest.docs');
let docObjs = [] as {}[] | undefined;
try {
docObjs = JSON.parse(docs);
const lines = docs?.split('\n') as string[];
lines.forEach((line) => docObjs?.push(JSON.parse(line)));
} catch {}
const query = getIn(values, 'search.request');
let queryObj = {} as {} | undefined;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,9 @@ export function WorkflowInputs(props: WorkflowInputsProps) {
useEffect(() => {
let parsedDocsObjs = [] as {}[];
try {
parsedDocsObjs = JSON.parse(props.ingestDocs);
} catch (e) {}
const lines = props.ingestDocs?.split('\n') as string[];
lines.forEach((line) => parsedDocsObjs.push(JSON.parse(line)));
} catch {}
setDocsPopulated(parsedDocsObjs.length > 0 && !isEmpty(parsedDocsObjs[0]));
}, [props.ingestDocs]);

Expand Down Expand Up @@ -607,7 +608,8 @@ export function WorkflowInputs(props: WorkflowInputsProps) {
try {
let ingestDocsObjs = [] as {}[];
try {
ingestDocsObjs = JSON.parse(props.ingestDocs);
const lines = props.ingestDocs?.split('\n') as string[];
lines.forEach((line) => ingestDocsObjs.push(JSON.parse(line)));
} catch (e) {}
if (ingestDocsObjs.length > 0 && !isEmpty(ingestDocsObjs[0])) {
success = await validateAndUpdateWorkflow(false, true, false);
Expand Down
Loading
Loading