diff --git a/.changeset/fifty-pigs-sit.md b/.changeset/fifty-pigs-sit.md new file mode 100644 index 0000000000000..12fef9e5d405f --- /dev/null +++ b/.changeset/fifty-pigs-sit.md @@ -0,0 +1,8 @@ +--- +"@medusajs/workflow-engine-inmemory": patch +"@medusajs/workflow-engine-redis": patch +"@medusajs/orchestration": patch +"@medusajs/workflows-sdk": patch +--- + +fix: workflow retry interval race condition diff --git a/integration-tests/modules/__tests__/workflow-engine/tests.ts b/integration-tests/modules/__tests__/workflow-engine/tests.ts index 8855260d15131..12e3c872756ae 100644 --- a/integration-tests/modules/__tests__/workflow-engine/tests.ts +++ b/integration-tests/modules/__tests__/workflow-engine/tests.ts @@ -185,7 +185,7 @@ export const workflowEngineTestSuite = ( hasAsyncSteps: true, hasFailedSteps: false, hasSkippedSteps: false, - hasWaitingSteps: false, + hasWaitingSteps: true, hasRevertedSteps: false, }), context: expect.objectContaining({ @@ -236,6 +236,13 @@ export const workflowEngineTestSuite = ( workflow_id: "my-workflow-name", transaction_id: "trx_123", state: "done", + execution: expect.objectContaining({ + hasAsyncSteps: true, + hasFailedSteps: false, + hasSkippedSteps: false, + hasWaitingSteps: false, + hasRevertedSteps: false, + }), context: expect.objectContaining({ data: expect.objectContaining({ invoke: expect.objectContaining({ diff --git a/packages/core/orchestration/src/transaction/distributed-transaction.ts b/packages/core/orchestration/src/transaction/distributed-transaction.ts index afa5d280eb2f7..7859d1284dd01 100644 --- a/packages/core/orchestration/src/transaction/distributed-transaction.ts +++ b/packages/core/orchestration/src/transaction/distributed-transaction.ts @@ -2,6 +2,7 @@ import { isDefined } from "@medusajs/utils" import { EventEmitter } from "events" import { IDistributedTransactionStorage } from "./datastore/abstract-storage" import { BaseInMemoryDistributedTransactionStorage } from "./datastore/base-in-memory-storage" +import { NonSerializableCheckPointError } from "./errors" import { TransactionOrchestrator } from "./transaction-orchestrator" import { TransactionStep, TransactionStepHandler } from "./transaction-step" import { @@ -9,7 +10,6 @@ import { TransactionHandlerType, TransactionState, } from "./types" -import { NonSerializableCheckPointError } from "./errors" /** * @typedef TransactionMetadata @@ -248,7 +248,6 @@ class DistributedTransaction extends EventEmitter { return } - await this.saveCheckpoint() await DistributedTransaction.keyValueStore.scheduleRetry( this, step, @@ -267,7 +266,6 @@ class DistributedTransaction extends EventEmitter { return } - await this.saveCheckpoint() await DistributedTransaction.keyValueStore.scheduleTransactionTimeout( this, Date.now(), diff --git a/packages/core/orchestration/src/transaction/errors.ts b/packages/core/orchestration/src/transaction/errors.ts index 8a3ff7bb7f265..081f067db5eae 100644 --- a/packages/core/orchestration/src/transaction/errors.ts +++ b/packages/core/orchestration/src/transaction/errors.ts @@ -84,3 +84,12 @@ export class NonSerializableCheckPointError extends Error { this.name = "NonSerializableCheckPointError" } } + +export class SkipExecutionError extends Error { + static isSkipExecutionError(error: Error): error is SkipExecutionError { + return ( + error instanceof SkipExecutionError || + error?.name === "SkipExecutionError" + ) + } +} diff --git a/packages/core/orchestration/src/transaction/transaction-orchestrator.ts b/packages/core/orchestration/src/transaction/transaction-orchestrator.ts index 7bb4ebd241cfb..25c2fe4b36131 100644 --- a/packages/core/orchestration/src/transaction/transaction-orchestrator.ts +++ b/packages/core/orchestration/src/transaction/transaction-orchestrator.ts @@ -28,6 +28,7 @@ import { import { EventEmitter } from "events" import { PermanentStepFailureError, + SkipExecutionError, SkipStepResponse, TransactionStepTimeoutError, TransactionTimeoutError, @@ -239,6 +240,7 @@ export class TransactionOrchestrator extends EventEmitter { ) { const flow = transaction.getFlow() let hasTimedOut = false + if (!flow.timedOutAt && this.hasExpired({ transaction }, Date.now())) { flow.timedOutAt = Date.now() @@ -252,8 +254,6 @@ export class TransactionOrchestrator extends EventEmitter { ) } - await transaction.saveCheckpoint() - this.emit(DistributedTransactionEvent.TIMEOUT, { transaction }) hasTimedOut = true @@ -281,8 +281,6 @@ export class TransactionOrchestrator extends EventEmitter { ) hasTimedOut = true - await transaction.saveCheckpoint() - this.emit(DistributedTransactionEvent.TIMEOUT, { transaction }) } return hasTimedOut @@ -457,7 +455,9 @@ export class TransactionOrchestrator extends EventEmitter { transaction: DistributedTransactionType, step: TransactionStep, response: unknown - ): Promise { + ): Promise<{ + stopExecution: boolean + }> { const hasStepTimedOut = step.getStates().state === TransactionStepState.TIMEOUT @@ -471,9 +471,6 @@ export class TransactionOrchestrator extends EventEmitter { ) } - const flow = transaction.getFlow() - const options = TransactionOrchestrator.getWorkflowOptions(flow.modelId) - if (!hasStepTimedOut) { step.changeStatus(TransactionStepStatus.OK) } @@ -484,8 +481,11 @@ export class TransactionOrchestrator extends EventEmitter { step.changeState(TransactionStepState.DONE) } - if (step.definition.async || options?.storeExecution) { + let shouldEmit = true + try { await transaction.saveCheckpoint() + } catch (error) { + shouldEmit = false } const cleaningUp: Promise[] = [] @@ -498,29 +498,41 @@ export class TransactionOrchestrator extends EventEmitter { await promiseAll(cleaningUp) - const eventName = step.isCompensating() - ? DistributedTransactionEvent.COMPENSATE_STEP_SUCCESS - : DistributedTransactionEvent.STEP_SUCCESS - transaction.emit(eventName, { step, transaction }) + if (shouldEmit) { + const eventName = step.isCompensating() + ? DistributedTransactionEvent.COMPENSATE_STEP_SUCCESS + : DistributedTransactionEvent.STEP_SUCCESS + transaction.emit(eventName, { step, transaction }) + } + + return { + stopExecution: !shouldEmit, + } } private static async skipStep( transaction: DistributedTransactionType, step: TransactionStep - ): Promise { + ): Promise<{ + stopExecution: boolean + }> { const hasStepTimedOut = step.getStates().state === TransactionStepState.TIMEOUT - const flow = transaction.getFlow() - const options = TransactionOrchestrator.getWorkflowOptions(flow.modelId) - if (!hasStepTimedOut) { step.changeStatus(TransactionStepStatus.OK) step.changeState(TransactionStepState.SKIPPED) } - if (step.definition.async || options?.storeExecution) { + let shouldEmit = true + try { await transaction.saveCheckpoint() + } catch (error) { + if (SkipExecutionError.isSkipExecutionError(error)) { + shouldEmit = false + } else { + throw error + } } const cleaningUp: Promise[] = [] @@ -533,8 +545,14 @@ export class TransactionOrchestrator extends EventEmitter { await promiseAll(cleaningUp) - const eventName = DistributedTransactionEvent.STEP_SKIPPED - transaction.emit(eventName, { step, transaction }) + if (shouldEmit) { + const eventName = DistributedTransactionEvent.STEP_SKIPPED + transaction.emit(eventName, { step, transaction }) + } + + return { + stopExecution: !shouldEmit, + } } private static async setStepTimeout( @@ -589,7 +607,15 @@ export class TransactionOrchestrator extends EventEmitter { maxRetries: number = TransactionOrchestrator.DEFAULT_RETRIES, isTimeout = false, timeoutError?: TransactionStepTimeoutError | TransactionTimeoutError - ): Promise { + ): Promise<{ + stopExecution: boolean + }> { + if (SkipExecutionError.isSkipExecutionError(error)) { + return { + stopExecution: false, + } + } + step.failures++ if (isErrorLike(error)) { @@ -604,7 +630,6 @@ export class TransactionOrchestrator extends EventEmitter { } const flow = transaction.getFlow() - const options = TransactionOrchestrator.getWorkflowOptions(flow.modelId) const cleaningUp: Promise[] = [] @@ -653,8 +678,15 @@ export class TransactionOrchestrator extends EventEmitter { } } - if (step.definition.async || options?.storeExecution) { + let shouldEmit = true + try { await transaction.saveCheckpoint() + } catch (error) { + if (SkipExecutionError.isSkipExecutionError(error)) { + shouldEmit = false + } else { + throw error + } } if (step.hasRetryScheduled()) { @@ -663,10 +695,16 @@ export class TransactionOrchestrator extends EventEmitter { await promiseAll(cleaningUp) - const eventName = step.isCompensating() - ? DistributedTransactionEvent.COMPENSATE_STEP_FAILURE - : DistributedTransactionEvent.STEP_FAILURE - transaction.emit(eventName, { step, transaction }) + if (shouldEmit) { + const eventName = step.isCompensating() + ? DistributedTransactionEvent.COMPENSATE_STEP_FAILURE + : DistributedTransactionEvent.STEP_FAILURE + transaction.emit(eventName, { step, transaction }) + } + + return { + stopExecution: !shouldEmit, + } } private async executeNext( @@ -680,7 +718,6 @@ export class TransactionOrchestrator extends EventEmitter { } const flow = transaction.getFlow() - const options = TransactionOrchestrator.getWorkflowOptions(flow.modelId) const nextSteps = await this.checkAllSteps(transaction) const execution: Promise[] = [] @@ -699,11 +736,9 @@ export class TransactionOrchestrator extends EventEmitter { } await transaction.saveCheckpoint() - this.emit(DistributedTransactionEvent.FINISH, { transaction }) } - let hasSyncSteps = false for (const step of nextSteps.next) { const curState = step.getStates() const type = step.isCompensating() @@ -783,19 +818,18 @@ export class TransactionOrchestrator extends EventEmitter { ) } - await TransactionOrchestrator.setStepFailure( + const ret = await TransactionOrchestrator.setStepFailure( transaction, step, error, endRetry ? 0 : step.definition.maxRetries ) - if (isAsync) { - await transaction.scheduleRetry( - step, - step.definition.retryInterval ?? 0 - ) + if (isAsync && !ret.stopExecution) { + await transaction.scheduleRetry(step, 0) } + + return ret } const traceData = { @@ -821,8 +855,6 @@ export class TransactionOrchestrator extends EventEmitter { ] as Parameters if (!isAsync) { - hasSyncSteps = true - const stepHandler = async () => { return await transaction.handler(...handlerArgs) } @@ -875,10 +907,13 @@ export class TransactionOrchestrator extends EventEmitter { endRetry: true, response, }) + return } - await setStepFailure(error, { response }) + await setStepFailure(error, { + response, + }) }) ) } else { @@ -933,10 +968,7 @@ export class TransactionOrchestrator extends EventEmitter { } // check nested flow - await transaction.scheduleRetry( - step, - step.definition.retryInterval ?? 0 - ) + await transaction.scheduleRetry(step, 0) }) .catch(async (error) => { const response = error?.getStepResponse?.() @@ -948,18 +980,27 @@ export class TransactionOrchestrator extends EventEmitter { endRetry: true, response, }) + return } - await setStepFailure(error, { response }) + await setStepFailure(error, { + response, + }) }) }) ) } } - if (hasSyncSteps && options?.storeExecution) { + try { await transaction.saveCheckpoint() + } catch (error) { + if (SkipExecutionError.isSkipExecutionError(error)) { + break + } else { + throw error + } } await promiseAll(execution) @@ -993,11 +1034,9 @@ export class TransactionOrchestrator extends EventEmitter { flow.state = TransactionState.INVOKING flow.startedAt = Date.now() - if (this.getOptions().store) { - await transaction.saveCheckpoint( - flow.hasAsyncSteps ? 0 : TransactionOrchestrator.DEFAULT_TTL - ) - } + await transaction.saveCheckpoint( + flow.hasAsyncSteps ? 0 : TransactionOrchestrator.DEFAULT_TTL + ) if (transaction.hasTimeout()) { await transaction.scheduleTransactionTimeout( diff --git a/packages/core/workflows-sdk/src/utils/composer/create-step.ts b/packages/core/workflows-sdk/src/utils/composer/create-step.ts index bdf58d6c5f463..9fa20f23641ba 100644 --- a/packages/core/workflows-sdk/src/utils/composer/create-step.ts +++ b/packages/core/workflows-sdk/src/utils/composer/create-step.ts @@ -192,8 +192,6 @@ export function applyStep< ret.__step__ = newStepName WorkflowManager.update(this.workflowId, this.flow, this.handlers) - //const confRef = proxify(ret) - if (global[OrchestrationUtils.SymbolMedusaWorkflowComposerCondition]) { const flagSteps = global[OrchestrationUtils.SymbolMedusaWorkflowComposerCondition].steps @@ -334,11 +332,11 @@ function wrapConditionalStep( * createStep, * StepResponse * } from "@medusajs/framework/workflows-sdk" - * + * * interface CreateProductInput { * title: string * } - * + * * export const createProductStep = createStep( * "createProductStep", * async function ( diff --git a/packages/core/workflows-sdk/src/utils/composer/create-workflow.ts b/packages/core/workflows-sdk/src/utils/composer/create-workflow.ts index 58f5134ed3b24..18d9eecf8d748 100644 --- a/packages/core/workflows-sdk/src/utils/composer/create-workflow.ts +++ b/packages/core/workflows-sdk/src/utils/composer/create-workflow.ts @@ -47,22 +47,22 @@ global[OrchestrationUtils.SymbolMedusaWorkflowComposerContext] = null * createProductStep, * getProductStep, * } from "./steps" - * + * * interface WorkflowInput { * title: string * } - * + * * const myWorkflow = createWorkflow( * "my-workflow", * (input: WorkflowInput) => { * // Everything here will be executed and resolved later * // during the execution. Including the data access. - * + * * const product = createProductStep(input) * return new WorkflowResponse(getProductStep(product.id)) * } * ) - * + * * export async function GET( * req: MedusaRequest, * res: MedusaResponse @@ -73,7 +73,7 @@ global[OrchestrationUtils.SymbolMedusaWorkflowComposerContext] = null * title: "Shirt" * } * }) - * + * * res.json({ * product * }) diff --git a/packages/modules/workflow-engine-inmemory/integration-tests/__tests__/index.spec.ts b/packages/modules/workflow-engine-inmemory/integration-tests/__tests__/index.spec.ts index e52ed640d76d1..bee8c94d41108 100644 --- a/packages/modules/workflow-engine-inmemory/integration-tests/__tests__/index.spec.ts +++ b/packages/modules/workflow-engine-inmemory/integration-tests/__tests__/index.spec.ts @@ -30,7 +30,7 @@ import { } from "../__fixtures__/workflow_event_group_id" import { createScheduled } from "../__fixtures__/workflow_scheduled" -jest.setTimeout(100000) +jest.setTimeout(3000000) moduleIntegrationTestRunner({ moduleName: Modules.WORKFLOW_ENGINE, diff --git a/packages/modules/workflow-engine-inmemory/integration-tests/__tests__/race.spec.ts b/packages/modules/workflow-engine-inmemory/integration-tests/__tests__/race.spec.ts new file mode 100644 index 0000000000000..8da509ad8a62c --- /dev/null +++ b/packages/modules/workflow-engine-inmemory/integration-tests/__tests__/race.spec.ts @@ -0,0 +1,182 @@ +import { IWorkflowEngineService } from "@medusajs/framework/types" +import { Modules } from "@medusajs/framework/utils" +import { + createStep, + createWorkflow, + StepResponse, + transform, + WorkflowResponse, +} from "@medusajs/framework/workflows-sdk" +import { moduleIntegrationTestRunner } from "@medusajs/test-utils" +import { setTimeout as setTimeoutSync } from "timers" +import { setTimeout } from "timers/promises" +import "../__fixtures__" + +jest.setTimeout(3000000) + +moduleIntegrationTestRunner({ + moduleName: Modules.WORKFLOW_ENGINE, + resolve: __dirname + "/../..", + testSuite: ({ service: workflowOrcModule, medusaApp }) => { + describe("Testing race condition of the workflow during retry", () => { + it("should prevent race continuation of the workflow during retryIntervalAwaiting in background execution", (done) => { + const step0InvokeMock = jest.fn() + const step1InvokeMock = jest.fn() + const step2InvokeMock = jest.fn() + const transformMock = jest.fn() + + const step0 = createStep("step0", async (_) => { + step0InvokeMock() + return new StepResponse("result from step 0") + }) + + const step1 = createStep("step1", async (_) => { + step1InvokeMock() + await setTimeout(2000) + return new StepResponse({ isSuccess: true }) + }) + + const step2 = createStep("step2", async (input: any) => { + step2InvokeMock() + return new StepResponse({ result: input }) + }) + + const subWorkflow = createWorkflow("sub-workflow-1", function () { + const status = step1() + return new WorkflowResponse(status) + }) + + createWorkflow("workflow-1", function () { + const build = step0() + + const status = subWorkflow.runAsStep({} as any).config({ + async: true, + compensateAsync: true, + backgroundExecution: true, + retryIntervalAwaiting: 1, + }) + + const transformedResult = transform({ status }, (data) => { + transformMock() + return { + status: data.status, + } + }) + + step2(transformedResult) + return new WorkflowResponse(build) + }) + + void workflowOrcModule.subscribe({ + workflowId: "workflow-1", + + subscriber: (event) => { + if (event.eventType === "onFinish") { + expect(step0InvokeMock).toHaveBeenCalledTimes(1) + expect(step1InvokeMock.mock.calls.length).toBeGreaterThan(1) + expect(step2InvokeMock).toHaveBeenCalledTimes(1) + expect(transformMock).toHaveBeenCalledTimes(1) + setTimeoutSync(done, 500) + } + }, + }) + + workflowOrcModule + .run("workflow-1", { throwOnError: false }) + .then(({ result }) => { + expect(result).toBe("result from step 0") + }) + .catch((e) => e) + }) + + it("should prevent race continuation of the workflow compensation during retryIntervalAwaiting in background execution", (done) => { + const workflowId = "RACE_workflow-1" + + const step0InvokeMock = jest.fn() + const step0CompensateMock = jest.fn() + const step1InvokeMock = jest.fn() + const step1CompensateMock = jest.fn() + const step2InvokeMock = jest.fn() + const transformMock = jest.fn() + + const step0 = createStep( + "RACE_step0", + async (_) => { + step0InvokeMock() + return new StepResponse("result from step 0") + }, + () => { + step0CompensateMock() + } + ) + + const step1 = createStep( + "RACE_step1", + async (_) => { + step1InvokeMock() + await setTimeout(300) + throw new Error("error from step 1") + }, + () => { + step1CompensateMock() + } + ) + + const step2 = createStep("RACE_step2", async (input: any) => { + step2InvokeMock() + return new StepResponse({ result: input }) + }) + + const subWorkflow = createWorkflow("RACE_sub-workflow-1", function () { + const status = step1() + return new WorkflowResponse(status) + }) + + createWorkflow(workflowId, function () { + const build = step0() + + const status = subWorkflow.runAsStep({} as any).config({ + async: true, + compensateAsync: true, + backgroundExecution: true, + retryIntervalAwaiting: 0.1, + }) + + const transformedResult = transform({ status }, (data) => { + transformMock() + return { + status: data.status, + } + }) + + step2(transformedResult) + return new WorkflowResponse(build) + }) + + void workflowOrcModule.subscribe({ + workflowId: workflowId, + subscriber: async (event) => { + if (event.eventType === "onFinish") { + expect(step0InvokeMock).toHaveBeenCalledTimes(1) + expect(step0CompensateMock).toHaveBeenCalledTimes(1) + expect(step1InvokeMock.mock.calls.length).toBeGreaterThan(2) + expect(step1CompensateMock).toHaveBeenCalledTimes(1) + expect(step2InvokeMock).toHaveBeenCalledTimes(0) + expect(transformMock).toHaveBeenCalledTimes(0) + setTimeoutSync(done, 500) + } + }, + }) + + workflowOrcModule + .run(workflowId, { + throwOnError: false, + }) + .then(({ result }) => { + expect(result).toBe("result from step 0") + }) + .catch((e) => e) + }) + }) + }, +}) diff --git a/packages/modules/workflow-engine-inmemory/package.json b/packages/modules/workflow-engine-inmemory/package.json index 3a4039af29f1b..ecaf2fb50bbaf 100644 --- a/packages/modules/workflow-engine-inmemory/package.json +++ b/packages/modules/workflow-engine-inmemory/package.json @@ -29,7 +29,7 @@ "resolve:aliases": "tsc --showConfig -p tsconfig.json > tsconfig.resolved.json && tsc-alias -p tsconfig.resolved.json && rimraf tsconfig.resolved.json", "build": "rimraf dist && tsc --build && npm run resolve:aliases", "test": "jest --passWithNoTests --runInBand --bail --forceExit -- src", - "test:integration": "jest --silent --forceExit -- integration-tests/**/__tests__/**/*.ts", + "test:integration": "jest --forceExit -- integration-tests/**/__tests__/**/*.ts", "migration:initial": " MIKRO_ORM_CLI_CONFIG=./mikro-orm.config.dev.ts medusa-mikro-orm migration:create --initial", "migration:create": " MIKRO_ORM_CLI_CONFIG=./mikro-orm.config.dev.ts medusa-mikro-orm migration:create", "migration:up": " MIKRO_ORM_CLI_CONFIG=./mikro-orm.config.dev.ts medusa-mikro-orm migration:up", diff --git a/packages/modules/workflow-engine-inmemory/src/services/workflows-module.ts b/packages/modules/workflow-engine-inmemory/src/services/workflows-module.ts index 5d89c45767f1b..c4c910068caaa 100644 --- a/packages/modules/workflow-engine-inmemory/src/services/workflows-module.ts +++ b/packages/modules/workflow-engine-inmemory/src/services/workflows-module.ts @@ -62,7 +62,9 @@ export class WorkflowsModuleService< await this.clearExpiredExecutions() this.clearTimeout_ = setInterval(async () => { - await this.clearExpiredExecutions() + try { + await this.clearExpiredExecutions() + } catch {} }, 1000 * 60 * 60) }, onApplicationShutdown: async () => { diff --git a/packages/modules/workflow-engine-inmemory/src/utils/workflow-orchestrator-storage.ts b/packages/modules/workflow-engine-inmemory/src/utils/workflow-orchestrator-storage.ts index 0a3248084e1b7..91462c84227f3 100644 --- a/packages/modules/workflow-engine-inmemory/src/utils/workflow-orchestrator-storage.ts +++ b/packages/modules/workflow-engine-inmemory/src/utils/workflow-orchestrator-storage.ts @@ -3,12 +3,18 @@ import { IDistributedSchedulerStorage, IDistributedTransactionStorage, SchedulerOptions, + SkipExecutionError, TransactionCheckpoint, + TransactionFlow, TransactionOptions, TransactionStep, } from "@medusajs/framework/orchestration" import { Logger, ModulesSdkTypes } from "@medusajs/framework/types" -import { MedusaError, TransactionState } from "@medusajs/framework/utils" +import { + MedusaError, + TransactionState, + TransactionStepState, +} from "@medusajs/framework/utils" import { WorkflowOrchestratorService } from "@services" import { CronExpression, parseExpression } from "cron-parser" @@ -121,8 +127,6 @@ export class InMemoryDistributedTransactionStorage ttl?: number, options?: TransactionOptions ): Promise { - this.storage.set(key, data) - /** * Store the retention time only if the transaction is done, failed or reverted. * From that moment, this tuple can be later on archived or deleted after the retention time. @@ -135,11 +139,15 @@ export class InMemoryDistributedTransactionStorage const { retentionTime, idempotent } = options ?? {} - if (hasFinished) { - Object.assign(data, { - retention_time: retentionTime, - }) - } + await this.#preventRaceConditionExecutionIfNecessary({ + data, + key, + }) + + Object.assign(data, { + retention_time: retentionTime, + }) + this.storage.set(key, data) if (hasFinished && !retentionTime && !idempotent) { await this.deleteFromDb(data) @@ -148,7 +156,103 @@ export class InMemoryDistributedTransactionStorage } if (hasFinished) { - this.storage.delete(key) + const tenMinutes = 10 * 60 * 1000 + setTimeout(() => { + this.storage.delete(key) + }, tenMinutes) + } + } + + async #preventRaceConditionExecutionIfNecessary({ + data, + key, + }: { + data: TransactionCheckpoint + key: string + }) { + /** + * In case many execution can succeed simultaneously, we need to ensure that the latest + * execution does continue if a previous execution is considered finished + */ + const currentFlow = data.flow + const { flow: latestUpdatedFlow } = + (await this.get(key)) ?? ({ flow: {} } as { flow: TransactionFlow }) + + const currentFlowLastInvokingStepIndex = Object.values( + currentFlow.steps + ).findIndex((step) => { + return [ + TransactionStepState.INVOKING, + TransactionStepState.NOT_STARTED, + ].includes(step.invoke?.state) + }) + + const latestUpdatedFlowLastInvokingStepIndex = !latestUpdatedFlow.steps + ? 1 // There is no other execution, so the current execution is the latest + : Object.values( + (latestUpdatedFlow.steps as Record) ?? {} + ).findIndex((step) => { + return [ + TransactionStepState.INVOKING, + TransactionStepState.NOT_STARTED, + ].includes(step.invoke?.state) + }) + + const currentFlowLastCompensatingStepIndex = Object.values( + currentFlow.steps + ) + .reverse() + .findIndex((step) => { + return [ + TransactionStepState.COMPENSATING, + TransactionStepState.NOT_STARTED, + ].includes(step.compensate?.state) + }) + + const latestUpdatedFlowLastCompensatingStepIndex = !latestUpdatedFlow.steps + ? -1 // There is no other execution, so the current execution is the latest + : Object.values( + (latestUpdatedFlow.steps as Record) ?? {} + ) + .reverse() + .findIndex((step) => { + return [ + TransactionStepState.COMPENSATING, + TransactionStepState.NOT_STARTED, + ].includes(step.compensate?.state) + }) + + const isLatestExecutionFinishedIndex = -1 + const invokeShouldBeSkipped = + (latestUpdatedFlowLastInvokingStepIndex === + isLatestExecutionFinishedIndex || + currentFlowLastInvokingStepIndex < + latestUpdatedFlowLastInvokingStepIndex) && + currentFlowLastInvokingStepIndex !== isLatestExecutionFinishedIndex + + const compensateShouldBeSkipped = + currentFlowLastCompensatingStepIndex < + latestUpdatedFlowLastCompensatingStepIndex && + currentFlowLastCompensatingStepIndex !== isLatestExecutionFinishedIndex && + latestUpdatedFlowLastCompensatingStepIndex !== + isLatestExecutionFinishedIndex + + if ( + (data.flow.state !== TransactionState.COMPENSATING && + invokeShouldBeSkipped) || + (data.flow.state === TransactionState.COMPENSATING && + compensateShouldBeSkipped) || + (latestUpdatedFlow.state === TransactionState.COMPENSATING && + ![TransactionState.REVERTED, TransactionState.FAILED].includes( + currentFlow.state + ) && + currentFlow.state !== latestUpdatedFlow.state) || + (latestUpdatedFlow.state === TransactionState.REVERTED && + currentFlow.state !== TransactionState.REVERTED) || + (latestUpdatedFlow.state === TransactionState.FAILED && + currentFlow.state !== TransactionState.FAILED) + ) { + throw new SkipExecutionError("Already finished by another execution") } } diff --git a/packages/modules/workflow-engine-redis/integration-tests/__tests__/race.spec.ts b/packages/modules/workflow-engine-redis/integration-tests/__tests__/race.spec.ts new file mode 100644 index 0000000000000..1304b6f8fc794 --- /dev/null +++ b/packages/modules/workflow-engine-redis/integration-tests/__tests__/race.spec.ts @@ -0,0 +1,203 @@ +import { IWorkflowEngineService } from "@medusajs/framework/types" +import { Modules } from "@medusajs/framework/utils" +import { + createStep, + createWorkflow, + StepResponse, + transform, + WorkflowResponse, +} from "@medusajs/framework/workflows-sdk" +import { moduleIntegrationTestRunner } from "@medusajs/test-utils" +import { setTimeout as setTimeoutSync } from "timers" +import { setTimeout } from "timers/promises" +import "../__fixtures__" + +jest.setTimeout(999900000) + +const failTrap = (done) => { + setTimeoutSync(() => { + // REF:https://stackoverflow.com/questions/78028715/jest-async-test-with-event-emitter-isnt-ending + console.warn( + "Jest is breaking the event emit with its debouncer. This allows to continue the test by managing the timeout of the test manually." + ) + done() + }, 5000) +} + +// REF:https://stackoverflow.com/questions/78028715/jest-async-test-with-event-emitter-isnt-ending + +moduleIntegrationTestRunner({ + moduleName: Modules.WORKFLOW_ENGINE, + resolve: __dirname + "/../..", + moduleOptions: { + redis: { + url: "localhost:6379", + }, + }, + testSuite: ({ service: workflowOrcModule, medusaApp }) => { + describe("Testing race condition of the workflow during retry", () => { + it("should prevent race continuation of the workflow during retryIntervalAwaiting in background execution", (done) => { + const transactionId = "transaction_id" + + const step0InvokeMock = jest.fn() + const step1InvokeMock = jest.fn() + const step2InvokeMock = jest.fn() + const transformMock = jest.fn() + + const step0 = createStep("step0", async (_) => { + step0InvokeMock() + return new StepResponse("result from step 0") + }) + + const step1 = createStep("step1", async (_) => { + step1InvokeMock() + await setTimeout(2000) + return new StepResponse({ isSuccess: true }) + }) + + const step2 = createStep("step2", async (input: any) => { + step2InvokeMock() + return new StepResponse({ result: input }) + }) + + const subWorkflow = createWorkflow("sub-workflow-1", function () { + const status = step1() + return new WorkflowResponse(status) + }) + + createWorkflow("workflow-1", function () { + const build = step0() + + const status = subWorkflow.runAsStep({} as any).config({ + async: true, + compensateAsync: true, + backgroundExecution: true, + retryIntervalAwaiting: 1, + }) + + const transformedResult = transform({ status }, (data) => { + transformMock() + return { + status: data.status, + } + }) + + step2(transformedResult) + return new WorkflowResponse(build) + }) + + void workflowOrcModule.subscribe({ + workflowId: "workflow-1", + transactionId, + subscriber: (event) => { + if (event.eventType === "onFinish") { + expect(step0InvokeMock).toHaveBeenCalledTimes(1) + expect(step1InvokeMock.mock.calls.length).toBeGreaterThan(1) + expect(step2InvokeMock).toHaveBeenCalledTimes(1) + expect(transformMock).toHaveBeenCalledTimes(1) + setTimeoutSync(done, 500) + } + }, + }) + + workflowOrcModule + .run("workflow-1", { transactionId }) + .then(({ result }) => { + expect(result).toBe("result from step 0") + }) + + failTrap(done) + }) + + it("should prevent race continuation of the workflow compensation during retryIntervalAwaiting in background execution", (done) => { + const transactionId = "transaction_id" + const workflowId = "RACE_workflow-1" + + const step0InvokeMock = jest.fn() + const step0CompensateMock = jest.fn() + const step1InvokeMock = jest.fn() + const step1CompensateMock = jest.fn() + const step2InvokeMock = jest.fn() + const transformMock = jest.fn() + + const step0 = createStep( + "RACE_step0", + async (_) => { + step0InvokeMock() + return new StepResponse("result from step 0") + }, + () => { + step0CompensateMock() + } + ) + + const step1 = createStep( + "RACE_step1", + async (_) => { + step1InvokeMock() + await setTimeout(500) + throw new Error("error from step 1") + }, + () => { + step1CompensateMock() + } + ) + + const step2 = createStep("RACE_step2", async (input: any) => { + step2InvokeMock() + return new StepResponse({ result: input }) + }) + + const subWorkflow = createWorkflow("RACE_sub-workflow-1", function () { + const status = step1() + return new WorkflowResponse(status) + }) + + createWorkflow(workflowId, function () { + const build = step0() + + const status = subWorkflow.runAsStep({} as any).config({ + async: true, + compensateAsync: true, + backgroundExecution: true, + retryIntervalAwaiting: 0.1, + }) + + const transformedResult = transform({ status }, (data) => { + transformMock() + return { + status: data.status, + } + }) + + step2(transformedResult) + return new WorkflowResponse(build) + }) + + void workflowOrcModule.subscribe({ + workflowId: workflowId, + transactionId, + subscriber: (event) => { + if (event.eventType === "onFinish") { + expect(step0InvokeMock).toHaveBeenCalledTimes(1) + expect(step0CompensateMock).toHaveBeenCalledTimes(1) + expect(step1InvokeMock.mock.calls.length).toBeGreaterThan(2) + expect(step1CompensateMock).toHaveBeenCalledTimes(1) + expect(step2InvokeMock).toHaveBeenCalledTimes(0) + expect(transformMock).toHaveBeenCalledTimes(0) + done() + } + }, + }) + + workflowOrcModule + .run(workflowId, { transactionId }) + .then(({ result }) => { + expect(result).toBe("result from step 0") + }) + + failTrap(done) + }) + }) + }, +}) diff --git a/packages/modules/workflow-engine-redis/src/services/workflow-orchestrator.ts b/packages/modules/workflow-engine-redis/src/services/workflow-orchestrator.ts index 43788f11884bd..e56384b6fadb4 100644 --- a/packages/modules/workflow-engine-redis/src/services/workflow-orchestrator.ts +++ b/packages/modules/workflow-engine-redis/src/services/workflow-orchestrator.ts @@ -239,6 +239,7 @@ export class WorkflowOrchestratorService { const hasFinished = ret.transaction.hasFinished() const metadata = ret.transaction.getFlow().metadata const { parentStepIdempotencyKey } = metadata ?? {} + const hasFailed = [ TransactionState.REVERTED, TransactionState.FAILED, diff --git a/packages/modules/workflow-engine-redis/src/services/workflows-module.ts b/packages/modules/workflow-engine-redis/src/services/workflows-module.ts index 4421f659e20f1..c4578270d6a7e 100644 --- a/packages/modules/workflow-engine-redis/src/services/workflows-module.ts +++ b/packages/modules/workflow-engine-redis/src/services/workflows-module.ts @@ -75,7 +75,9 @@ export class WorkflowsModuleService< await this.clearExpiredExecutions() this.clearTimeout_ = setInterval(async () => { - await this.clearExpiredExecutions() + try { + await this.clearExpiredExecutions() + } catch {} }, 1000 * 60 * 60) }, } diff --git a/packages/modules/workflow-engine-redis/src/utils/workflow-orchestrator-storage.ts b/packages/modules/workflow-engine-redis/src/utils/workflow-orchestrator-storage.ts index 8365160509550..3e1c3725d484f 100644 --- a/packages/modules/workflow-engine-redis/src/utils/workflow-orchestrator-storage.ts +++ b/packages/modules/workflow-engine-redis/src/utils/workflow-orchestrator-storage.ts @@ -4,7 +4,9 @@ import { IDistributedSchedulerStorage, IDistributedTransactionStorage, SchedulerOptions, + SkipExecutionError, TransactionCheckpoint, + TransactionFlow, TransactionOptions, TransactionStep, } from "@medusajs/framework/orchestration" @@ -13,6 +15,7 @@ import { MedusaError, promiseAll, TransactionState, + TransactionStepState, } from "@medusajs/framework/utils" import { WorkflowOrchestratorService } from "@services" import { Queue, Worker } from "bullmq" @@ -263,6 +266,11 @@ export class RedisDistributedTransactionStorage const { retentionTime, idempotent } = options ?? {} + await this.#preventRaceConditionExecutionIfNecessary({ + data, + key, + }) + if (hasFinished) { Object.assign(data, { retention_time: retentionTime, @@ -457,4 +465,97 @@ export class RedisDistributedTransactionStorage repeatableJobs.map((job) => this.jobQueue?.removeRepeatableByKey(job.key)) ) } + + async #preventRaceConditionExecutionIfNecessary({ + data, + key, + }: { + data: TransactionCheckpoint + key: string + }) { + /** + * In case many execution can succeed simultaneously, we need to ensure that the latest + * execution does continue if a previous execution is considered finished + */ + const currentFlow = data.flow + const { flow: latestUpdatedFlow } = + (await this.get(key)) ?? ({ flow: {} } as { flow: TransactionFlow }) + + const currentFlowLastInvokingStepIndex = Object.values( + currentFlow.steps + ).findIndex((step) => { + return [ + TransactionStepState.INVOKING, + TransactionStepState.NOT_STARTED, + ].includes(step.invoke?.state) + }) + + const latestUpdatedFlowLastInvokingStepIndex = !latestUpdatedFlow.steps + ? 1 // There is no other execution, so the current execution is the latest + : Object.values( + (latestUpdatedFlow.steps as Record) ?? {} + ).findIndex((step) => { + return [ + TransactionStepState.INVOKING, + TransactionStepState.NOT_STARTED, + ].includes(step.invoke?.state) + }) + + const currentFlowLastCompensatingStepIndex = Object.values( + currentFlow.steps + ) + .reverse() + .findIndex((step) => { + return [ + TransactionStepState.COMPENSATING, + TransactionStepState.NOT_STARTED, + ].includes(step.compensate?.state) + }) + + const latestUpdatedFlowLastCompensatingStepIndex = !latestUpdatedFlow.steps + ? -1 + : Object.values( + (latestUpdatedFlow.steps as Record) ?? {} + ) + .reverse() + .findIndex((step) => { + return [ + TransactionStepState.COMPENSATING, + TransactionStepState.NOT_STARTED, + ].includes(step.compensate?.state) + }) + + const isLatestExecutionFinishedIndex = -1 + const invokeShouldBeSkipped = + (latestUpdatedFlowLastInvokingStepIndex === + isLatestExecutionFinishedIndex || + currentFlowLastInvokingStepIndex < + latestUpdatedFlowLastInvokingStepIndex) && + currentFlowLastInvokingStepIndex !== isLatestExecutionFinishedIndex + + const compensateShouldBeSkipped = + currentFlowLastCompensatingStepIndex < + latestUpdatedFlowLastCompensatingStepIndex && + currentFlowLastCompensatingStepIndex !== isLatestExecutionFinishedIndex && + latestUpdatedFlowLastCompensatingStepIndex !== + isLatestExecutionFinishedIndex + + if ( + (data.flow.state !== TransactionState.COMPENSATING && + invokeShouldBeSkipped) || + (data.flow.state === TransactionState.COMPENSATING && + compensateShouldBeSkipped) || + (latestUpdatedFlow.state === TransactionState.COMPENSATING && + ![TransactionState.REVERTED, TransactionState.FAILED].includes( + currentFlow.state + ) && + currentFlow.state !== latestUpdatedFlow.state) || + (latestUpdatedFlow.state === TransactionState.REVERTED && + currentFlow.state !== TransactionState.REVERTED) || + (latestUpdatedFlow.state === TransactionState.FAILED && + currentFlow.state !== TransactionState.FAILED) + ) { + throw new SkipExecutionError("Already finished by another execution") + } + } }