diff --git a/common/constants.ts b/common/constants.ts index 03870eda..3a04d900 100644 --- a/common/constants.ts +++ b/common/constants.ts @@ -266,6 +266,7 @@ export const ML_RESPONSE_PROCESSOR_EXAMPLE_DOCS_LINK = 'https://opensearch.org/docs/latest/search-plugins/search-pipelines/ml-inference-search-response/#example-externally-hosted-text-embedding-model'; export const UPDATE_MODEL_DOCS_LINK = 'https://opensearch.org/docs/latest/ml-commons-plugin/api/model-apis/update-model/'; +export const JSONLINES_LINK = 'https://jsonlines.org/'; // Large Language Models Documentation Links export const BEDROCK_CLAUDE_3_SONNET_DOCS_LINK = diff --git a/common/interfaces.ts b/common/interfaces.ts index 81f2b2dd..357373ca 100644 --- a/common/interfaces.ts +++ b/common/interfaces.ts @@ -30,6 +30,7 @@ export type ConfigFieldType = | 'json' | 'jsonArray' | 'jsonString' + | 'jsonLines' | 'select' | 'model' | 'map' diff --git a/common/utils.ts b/common/utils.ts index 54292854..8205e795 100644 --- a/common/utils.ts +++ b/common/utils.ts @@ -40,6 +40,10 @@ export function customStringify(jsonObj: {} | []): string { return JSON.stringify(jsonObj, undefined, 2); } +export function customStringifySingleLine(jsonObj: {}): string { + return JSON.stringify(jsonObj, undefined, 0); +} + export function isVectorSearchUseCase(workflow: Workflow | undefined): boolean { return ( workflow?.ui_metadata?.type !== undefined && diff --git a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx index 0f2576be..0269d74b 100644 --- a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx +++ b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx @@ -44,7 +44,8 @@ export function SourceData(props: SourceDataProps) { // empty/populated docs state let docs = []; try { - docs = JSON.parse(getIn(values, 'ingest.docs', [])); + const lines = getIn(values, 'ingest.docs', '').split('\n') as string[]; + lines.forEach((line) => docs.push(JSON.parse(line))); } catch {} const docsPopulated = docs.length > 0; diff --git a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx index cb443dfd..7254ad8b 100644 --- a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx +++ b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx @@ -21,15 +21,17 @@ import { EuiSmallButtonEmpty, EuiButtonGroup, EuiCompressedComboBox, + EuiLink, } from '@elastic/eui'; -import { JsonField } from '../input_fields'; +import { JsonLinesField } from '../input_fields'; import { - customStringify, + customStringifySingleLine, FETCH_ALL_QUERY_LARGE, IConfigField, IndexMappings, IngestDocsFormValues, isVectorSearchUseCase, + JSONLINES_LINK, MAX_BYTES_FORMATTED, MAX_DOCS_TO_IMPORT, SearchHit, @@ -72,11 +74,11 @@ export function SourceDataModal(props: SourceDataProps) { // sub-form values/schema const docsFormValues = { - docs: getInitialValue('jsonArray'), + docs: getInitialValue('jsonLines'), } as IngestDocsFormValues; const docsFormSchema = yup.object({ docs: getFieldSchema({ - type: 'jsonArray', + type: 'jsonLines', } as IConfigField), }) as yup.Schema; @@ -177,8 +179,14 @@ export function SourceDataModal(props: SourceDataProps) { .then((resp) => { const docObjs = resp?.hits?.hits ?.slice(0, MAX_DOCS_TO_IMPORT) - ?.map((hit: SearchHit) => hit?._source); - formikProps.setFieldValue('docs', customStringify(docObjs)); + ?.map((hit: SearchHit) => hit?._source) as {}[]; + let jsonLinesStr = ''; + try { + docObjs.forEach((docObj) => { + jsonLinesStr += customStringifySingleLine(docObj) + '\n'; + }); + } catch {} + formikProps.setFieldValue('docs', jsonLinesStr); }); } }, [selectedIndex]); @@ -234,7 +242,7 @@ export function SourceDataModal(props: SourceDataProps) { {props.selectedOption === SOURCE_OPTIONS.UPLOAD && ( <> { @@ -247,6 +255,7 @@ export function SourceDataModal(props: SourceDataProps) { 'docs', e.target.result as string ); + formikProps.setFieldTouched('docs'); } }; fileReader.readAsText(files[0]); @@ -286,12 +295,20 @@ export function SourceDataModal(props: SourceDataProps) { )} - + Documents must be in JSON lines format.{' '} + + Learn more + + + } editorHeight="40vh" readOnly={false} + validate={true} /> diff --git a/public/pages/workflow_detail/workflow_inputs/input_fields/index.ts b/public/pages/workflow_detail/workflow_inputs/input_fields/index.ts index 8de788b2..d0331f68 100644 --- a/public/pages/workflow_detail/workflow_inputs/input_fields/index.ts +++ b/public/pages/workflow_detail/workflow_inputs/input_fields/index.ts @@ -5,6 +5,7 @@ export { TextField } from './text_field'; export { JsonField } from './json_field'; +export { JsonLinesField } from './json_lines_field'; export { ModelField } from './model_field'; export { MapField } from './map_field'; export { MapArrayField } from './map_array_field'; diff --git a/public/pages/workflow_detail/workflow_inputs/input_fields/json_field.tsx b/public/pages/workflow_detail/workflow_inputs/input_fields/json_field.tsx index 969ea12b..1eee7680 100644 --- a/public/pages/workflow_detail/workflow_inputs/input_fields/json_field.tsx +++ b/public/pages/workflow_detail/workflow_inputs/input_fields/json_field.tsx @@ -29,7 +29,7 @@ interface JsonFieldProps { * in some custom JSON */ export function JsonField(props: JsonFieldProps) { - const validate = props.validate !== undefined ? props.validate : true; + const validate = props.validate ?? true; const { errors, touched, values } = useFormikContext(); diff --git a/public/pages/workflow_detail/workflow_inputs/input_fields/json_lines_field.tsx b/public/pages/workflow_detail/workflow_inputs/input_fields/json_lines_field.tsx new file mode 100644 index 00000000..3d5a9938 --- /dev/null +++ b/public/pages/workflow_detail/workflow_inputs/input_fields/json_lines_field.tsx @@ -0,0 +1,181 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +import React, { ReactNode, useEffect, useState } from 'react'; +import { Field, FieldProps, getIn, useFormikContext } from 'formik'; +import { isEmpty } from 'lodash'; +import { + EuiCodeEditor, + EuiCompressedFormRow, + EuiLink, + EuiText, +} from '@elastic/eui'; +import { + customStringifySingleLine, + WorkflowFormValues, +} from '../../../../../common'; +import { camelCaseToTitleString } from '../../../../utils'; + +interface JsonLinesFieldProps { + fieldPath: string; // the full path in string-form to the field (e.g., 'ingest.enrich.processors.text_embedding_processor.inputField') + validate?: boolean; + label?: string; + helpLink?: string; + helpText?: string | ReactNode; + editorHeight?: string; + readOnly?: boolean; +} + +/** + * An input field for a component where users input data in JSON Lines format. + * https://jsonlines.org/ + */ +export function JsonLinesField(props: JsonLinesFieldProps) { + const validate = props.validate ?? true; + + const { errors, touched, values } = useFormikContext(); + + // temp input state. only format when users click out of the code editor + const [jsonStr, setJsonStr] = useState('{}'); + const [customErrMsg, setCustomErrMsg] = useState( + undefined + ); + + // initializing the text to be the stringified form value + useEffect(() => { + if (props.fieldPath && values) { + const formValue = getIn(values, props.fieldPath) as string; + if (formValue) { + setJsonStr(formValue); + } + } + }, [props.fieldPath, values]); + + return ( + + {({ field, form }: FieldProps) => { + return ( + + + Learn more + + + ) : undefined + } + helpText={props.helpText || undefined} + error={ + validate ? ( + <> + {customErrMsg?.split('\n')?.map((errMsg, idx) => { + return ( + + {errMsg} + + ); + })} + + ) : undefined + } + isInvalid={ + validate + ? getIn(errors, field.name) && getIn(touched, field.name) + : false + } + > + { + setJsonStr(input); + form.setFieldValue(field.name, input); + setCustomErrMsg(undefined); + }} + onBlur={() => { + form.setFieldTouched(field.name); + let finalJsonStr = ''; + let errs = [] as string[]; + try { + const lines = jsonStr?.split('\n'); + lines.forEach((line: string, idx) => { + if (line.trim() !== '') { + let parsedLine = {}; + try { + parsedLine = JSON.parse(line); + } catch (error) { + errs.push( + getFormattedErrorMsg(error as Error, idx + 1) + ); + } + if (!isEmpty(parsedLine)) { + finalJsonStr += + customStringifySingleLine(JSON.parse(line)) + '\n'; + } + } + }); + // remove trailing newline + if (finalJsonStr !== '') { + finalJsonStr = finalJsonStr.slice(0, -1); + } + + if (errs?.length > 0) { + setCustomErrMsg(getFormattedErrorMsgList(errs)); + } else { + form.setFieldValue(field.name, finalJsonStr); + setCustomErrMsg(undefined); + } + } catch (error) {} + }} + readOnly={props.readOnly || false} + setOptions={{ + fontSize: '14px', + useWorker: false, + highlightActiveLine: !props.readOnly, + highlightSelectedWord: !props.readOnly, + highlightGutterLine: !props.readOnly, + wrap: true, + }} + aria-label="Code Editor" + /> + + ); + }} + + ); +} + +// Parse out the useful information from an error triggered during JSON parsing failure +function getFormattedErrorMsg(error: Error, idx: number): string { + return `Error on line ${idx}: ${getIn(error, 'message', 'Invalid JSON') + .replace(/^(.*?)\s+in JSON.*/, '$1') + .replace(/^(.*?)\s+after JSON.*/, '$1')}`; +} + +// Verbosely display a few error messages, list the count of remaining ones. +function getFormattedErrorMsgList(errors: string[]): string { + let finalMsg = ''; + const verboseErrors = errors.slice(0, 3); + const nonVerboseErrorCount = errors.length - 3; + verboseErrors.forEach((error) => { + finalMsg += error + '\n'; + }); + if (nonVerboseErrorCount > 0) { + finalMsg += `${nonVerboseErrorCount} more error${ + nonVerboseErrorCount > 1 ? 's' : '' + }`; + } else if (finalMsg !== '') { + // remove trailing newline + finalMsg = finalMsg.slice(0, -1); + } + return finalMsg; +} diff --git a/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs/modals/configure_expression_modal.tsx b/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs/modals/configure_expression_modal.tsx index 8620290b..45f0385d 100644 --- a/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs/modals/configure_expression_modal.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs/modals/configure_expression_modal.tsx @@ -123,7 +123,8 @@ export function ConfigureExpressionModal(props: ConfigureExpressionModalProps) { const docs = getIn(values, 'ingest.docs'); let docObjs = [] as {}[] | undefined; try { - docObjs = JSON.parse(docs); + const lines = docs?.split('\n') as string[]; + lines.forEach((line) => docObjs?.push(JSON.parse(line))); } catch {} const query = getIn(values, 'search.request'); let queryObj = {} as {} | undefined; @@ -465,9 +466,13 @@ export function ConfigureExpressionModal(props: ConfigureExpressionModalProps) { }); } else { try { - const docObjs = JSON.parse( - values.ingest.docs - ) as {}[]; + const docObjs = [] as {}[]; + const lines = values?.ingest?.docs?.split( + '\n' + ) as string[]; + lines.forEach((line) => + docObjs?.push(JSON.parse(line)) + ); if (docObjs.length > 0) { setSourceInput( customStringify(docObjs[0]) diff --git a/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs/modals/configure_multi_expression_modal.tsx b/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs/modals/configure_multi_expression_modal.tsx index 5837fc66..3d3116fe 100644 --- a/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs/modals/configure_multi_expression_modal.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs/modals/configure_multi_expression_modal.tsx @@ -129,7 +129,8 @@ export function ConfigureMultiExpressionModal( const docs = getIn(values, 'ingest.docs'); let docObjs = [] as {}[] | undefined; try { - docObjs = JSON.parse(docs); + const lines = docs?.split('\n') as string[]; + lines.forEach((line) => docObjs?.push(JSON.parse(line))); } catch {} const query = getIn(values, 'search.request'); let queryObj = {} as {} | undefined; diff --git a/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs/modals/configure_template_modal.tsx b/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs/modals/configure_template_modal.tsx index a243a262..f12c9eda 100644 --- a/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs/modals/configure_template_modal.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs/modals/configure_template_modal.tsx @@ -149,7 +149,8 @@ export function ConfigureTemplateModal(props: ConfigureTemplateModalProps) { const docs = getIn(values, 'ingest.docs'); let docObjs = [] as {}[] | undefined; try { - docObjs = JSON.parse(docs); + const lines = docs?.split('\n') as string[]; + lines.forEach((line) => docObjs?.push(JSON.parse(line))); } catch {} const query = getIn(values, 'search.request'); let queryObj = {} as {} | undefined; diff --git a/public/pages/workflow_detail/workflow_inputs/workflow_inputs.tsx b/public/pages/workflow_detail/workflow_inputs/workflow_inputs.tsx index 0e19226e..9e63accd 100644 --- a/public/pages/workflow_detail/workflow_inputs/workflow_inputs.tsx +++ b/public/pages/workflow_detail/workflow_inputs/workflow_inputs.tsx @@ -275,8 +275,9 @@ export function WorkflowInputs(props: WorkflowInputsProps) { useEffect(() => { let parsedDocsObjs = [] as {}[]; try { - parsedDocsObjs = JSON.parse(props.ingestDocs); - } catch (e) {} + const lines = props.ingestDocs?.split('\n') as string[]; + lines.forEach((line) => parsedDocsObjs.push(JSON.parse(line))); + } catch {} setDocsPopulated(parsedDocsObjs.length > 0 && !isEmpty(parsedDocsObjs[0])); }, [props.ingestDocs]); @@ -607,7 +608,8 @@ export function WorkflowInputs(props: WorkflowInputsProps) { try { let ingestDocsObjs = [] as {}[]; try { - ingestDocsObjs = JSON.parse(props.ingestDocs); + const lines = props.ingestDocs?.split('\n') as string[]; + lines.forEach((line) => ingestDocsObjs.push(JSON.parse(line))); } catch (e) {} if (ingestDocsObjs.length > 0 && !isEmpty(ingestDocsObjs[0])) { success = await validateAndUpdateWorkflow(false, true, false); diff --git a/public/utils/config_to_form_utils.ts b/public/utils/config_to_form_utils.ts index 62484fc5..b1ed4d45 100644 --- a/public/utils/config_to_form_utils.ts +++ b/public/utils/config_to_form_utils.ts @@ -46,7 +46,7 @@ function ingestConfigToFormik( ingestFormikValues['pipelineName'] = ingestConfig.pipelineName.value || getInitialValue(ingestConfig.pipelineName.type); - ingestFormikValues['docs'] = ingestDocs || getInitialValue('jsonArray'); + ingestFormikValues['docs'] = ingestDocs || getInitialValue('jsonLines'); ingestFormikValues['enrich'] = processorsConfigToFormik( ingestConfig.enrich ); @@ -124,10 +124,9 @@ function searchIndexConfigToFormik( // Helper fn to get an initial value based on the field type export function getInitialValue(fieldType: ConfigFieldType): ConfigFieldValue { switch (fieldType) { - case 'string': { - return ''; - } - case 'select': { + case 'string': + case 'select': + case 'jsonLines': { return ''; } case 'model': { diff --git a/public/utils/config_to_schema_utils.ts b/public/utils/config_to_schema_utils.ts index 3465066f..0044fd19 100644 --- a/public/utils/config_to_schema_utils.ts +++ b/public/utils/config_to_schema_utils.ts @@ -47,7 +47,7 @@ function ingestConfigToSchema( const ingestSchemaObj = {} as { [key: string]: Schema }; if (ingestConfig?.enabled) { ingestSchemaObj['docs'] = getFieldSchema({ - type: 'jsonArray', + type: 'jsonLines', } as IConfigField); ingestSchemaObj['pipelineName'] = getFieldSchema(ingestConfig.pipelineName); ingestSchemaObj['enrich'] = processorsConfigToSchema(ingestConfig.enrich); @@ -221,6 +221,40 @@ export function getFieldSchema( ); break; } + case 'jsonLines': { + baseSchema = yup + .string() + .test('jsonLines', 'Invalid JSON lines format', (value) => { + let isValid = true; + try { + value?.split('\n').forEach((line: string) => { + if (line.trim() !== '') { + try { + JSON.parse(line); + } catch (e) { + isValid = false; + } + } + }); + } catch (error) { + isValid = false; + } + return isValid; + }) + .test( + 'jsonLines', + `The data size exceeds the limit of ${MAX_BYTES} bytes`, + (value) => { + try { + // @ts-ignore + return new TextEncoder().encode(value)?.length < MAX_BYTES; + } catch (error) { + return false; + } + } + ); + break; + } case 'jsonString': { baseSchema = yup .string() diff --git a/public/utils/form_to_pipeline_utils.ts b/public/utils/form_to_pipeline_utils.ts index 36ff5981..cd61c323 100644 --- a/public/utils/form_to_pipeline_utils.ts +++ b/public/utils/form_to_pipeline_utils.ts @@ -44,7 +44,8 @@ export function formikToPartialPipeline( return !isEmpty(precedingProcessors) ? ({ processors: processorConfigsToTemplateProcessors( - precedingProcessors + precedingProcessors, + context ), } as IngestPipelineConfig) : undefined; @@ -60,7 +61,8 @@ export function formikToPartialPipeline( return !isEmpty(precedingProcessors) ? ({ request_processors: processorConfigsToTemplateProcessors( - precedingProcessors + precedingProcessors, + context ), } as SearchPipelineConfig) : undefined; @@ -78,10 +80,12 @@ export function formikToPartialPipeline( return !isEmpty(precedingProcessors) || !isEmpty(requestProcessors) ? ({ request_processors: processorConfigsToTemplateProcessors( - requestProcessors + requestProcessors, + context ), response_processors: processorConfigsToTemplateProcessors( - precedingProcessors + precedingProcessors, + context ), } as SearchPipelineConfig) : undefined; diff --git a/public/utils/utils.tsx b/public/utils/utils.tsx index 6c23a6ce..26163c58 100644 --- a/public/utils/utils.tsx +++ b/public/utils/utils.tsx @@ -175,9 +175,10 @@ export function prepareDocsForSimulate( const preparedDocs = [] as SimulateIngestPipelineDoc[]; let docObjs = [] as {}[]; try { - docObjs = JSON.parse(docs) as {}[]; + const lines = docs?.split('\n') as string[]; + lines.forEach((line) => docObjs.push(JSON.parse(line))); } catch {} - docObjs.forEach((doc) => { + docObjs?.forEach((doc) => { preparedDocs.push({ _index: indexName, _id: generateId(),