diff --git a/runtime/oti/j9nonbuilder.h b/runtime/oti/j9nonbuilder.h index 72424ab198c..9b77e7803cc 100644 --- a/runtime/oti/j9nonbuilder.h +++ b/runtime/oti/j9nonbuilder.h @@ -4229,6 +4229,7 @@ typedef struct J9CRIUCheckpointState { I_64 checkpointRestoreTimeDelta; I_64 lastRestoreTimeMillis; UDATA maxRetryForNotCheckpointSafe; + UDATA sleepMillisecondsForNotCheckpointSafe; jclass criuJVMCheckpointExceptionClass; jclass criuSystemCheckpointExceptionClass; jclass criuJVMRestoreExceptionClass; diff --git a/runtime/oti/jvminit.h b/runtime/oti/jvminit.h index cec42f40626..5bfa34eb4e6 100644 --- a/runtime/oti/jvminit.h +++ b/runtime/oti/jvminit.h @@ -435,6 +435,8 @@ enum INIT_STAGE { #define VMOPT_XSHARECLASSES_DISABLEONRESTORE "-Xshareclasses:disableOnRestore" #define VMOPT_XXENABLETHROWONDELAYECHECKPOINTOPERATION "-XX:+ThrowOnDelayedCheckpointOperation" #define VMOPT_XXDISABLETHROWONDELAYECHECKPOINTOPERATION "-XX:-ThrowOnDelayedCheckpointOperation" +#define VMOPT_XXMAXRETRYFORNOTCHECKPOINTSAFE_EQUALS "-XX:maxRetryForNotCheckpointSafe=" +#define VMOPT_XXSLEEPMILLISECONDSFORNOTCHECKPOINTSAFE_EQUALS "-XX:sleepMillisecondsForNotCheckpointSafe=" #endif /* defined(J9VM_OPT_CRIU_SUPPORT) */ /* Compatibility options. */ diff --git a/runtime/vm/CRIUHelpers.cpp b/runtime/vm/CRIUHelpers.cpp index 47822110881..a5a1d1df79c 100644 --- a/runtime/vm/CRIUHelpers.cpp +++ b/runtime/vm/CRIUHelpers.cpp @@ -1498,6 +1498,7 @@ criuCheckpointJVMImpl(JNIEnv *env, UDATA success = 0; bool safePoint = J9_ARE_ANY_BITS_SET(vm->extendedRuntimeFlags, J9_EXTENDED_RUNTIME_OSR_SAFE_POINT); UDATA maxRetries = vm->checkpointState.maxRetryForNotCheckpointSafe; + UDATA sleepMilliseconds = vm->checkpointState.sleepMillisecondsForNotCheckpointSafe; BOOLEAN syslogFlagNone = TRUE; char *syslogOptions = NULL; I_32 syslogBufferSize = 0; @@ -1652,7 +1653,7 @@ criuCheckpointJVMImpl(JNIEnv *env, for (UDATA i = 0; (0 != notSafeToCheckpoint) && (i <= maxRetries); i++) { releaseSafeOrExcusiveVMAccess(currentThread, vmFuncs, safePoint); vmFuncs->internalExitVMToJNI(currentThread); - omrthread_nanosleep(10000); + omrthread_sleep(sleepMilliseconds); vmFuncs->internalEnterVMFromJNI(currentThread); acquireSafeOrExcusiveVMAccess(currentThread, vmFuncs, safePoint); notSafeToCheckpoint = checkIfSafeToCheckpoint(currentThread); diff --git a/runtime/vm/jvminit.c b/runtime/vm/jvminit.c index 8dd5932713e..b83c084c02e 100644 --- a/runtime/vm/jvminit.c +++ b/runtime/vm/jvminit.c @@ -2760,6 +2760,37 @@ VMInitStages(J9JavaVM *vm, IDATA stage, void* reserved) argIndex2 = FIND_NEXT_ARG_IN_VMARGS_FORWARD(STARTSWITH_MATCH, VMOPT_XXGLOBALLOCKRESERVATIONCOLON, NULL, argIndex2); } +#if defined(J9VM_OPT_CRIU_SUPPORT) + vm->checkpointState.maxRetryForNotCheckpointSafe = 100; + if ((argIndex = FIND_AND_CONSUME_VMARG(STARTSWITH_MATCH, VMOPT_XXMAXRETRYFORNOTCHECKPOINTSAFE_EQUALS, NULL)) >= 0) { + UDATA maxRetryForNotCheckpointSafe = 0; + char *optname = VMOPT_XXMAXRETRYFORNOTCHECKPOINTSAFE_EQUALS; + parseError = GET_INTEGER_VALUE(argIndex, optname, maxRetryForNotCheckpointSafe); + if (OPTION_OK != parseError) { + parseErrorOption = VMOPT_XXMAXRETRYFORNOTCHECKPOINTSAFE_EQUALS; + goto _memParseError; + } + vm->checkpointState.maxRetryForNotCheckpointSafe = maxRetryForNotCheckpointSafe; + } + + vm->checkpointState.sleepMillisecondsForNotCheckpointSafe = 10; + if ((argIndex = FIND_AND_CONSUME_VMARG(STARTSWITH_MATCH, VMOPT_XXSLEEPMILLISECONDSFORNOTCHECKPOINTSAFE_EQUALS, NULL)) >= 0) { + UDATA sleepMillisecondsForNotCheckpointSafe = 0; + char *optname = VMOPT_XXSLEEPMILLISECONDSFORNOTCHECKPOINTSAFE_EQUALS; + parseError = GET_INTEGER_VALUE(argIndex, optname, sleepMillisecondsForNotCheckpointSafe); + if (OPTION_OK != parseError) { + parseErrorOption = VMOPT_XXSLEEPMILLISECONDSFORNOTCHECKPOINTSAFE_EQUALS; + goto _memParseError; + } + if (sleepMillisecondsForNotCheckpointSafe < 1) { + parseErrorOption = VMOPT_XXSLEEPMILLISECONDSFORNOTCHECKPOINTSAFE_EQUALS; + parseError = OPTION_OUTOFRANGE; + goto _memParseError; + } + vm->checkpointState.sleepMillisecondsForNotCheckpointSafe = sleepMillisecondsForNotCheckpointSafe; + } +#endif /* defined(J9VM_OPT_CRIU_SUPPORT) */ + break; case BYTECODE_TABLE_SET: @@ -3924,8 +3955,6 @@ processVMArgsFromFirstToLast(J9JavaVM * vm) } vm->checkpointState.lastRestoreTimeMillis = -1; - /* Its unclear if we need an option for this, so we can keep the init here for the time being */ - vm->checkpointState.maxRetryForNotCheckpointSafe = 100; #endif /* defined(J9VM_OPT_CRIU_SUPPORT) */ { diff --git a/test/functional/cmdLineTests/criu/criu_nonPortable.xml b/test/functional/cmdLineTests/criu/criu_nonPortable.xml index 61fcc2b87d7..ecb48733b09 100644 --- a/test/functional/cmdLineTests/criu/criu_nonPortable.xml +++ b/test/functional/cmdLineTests/criu/criu_nonPortable.xml @@ -292,9 +292,8 @@ Could not dump the JVM processes, err=-70 - Thread pid mismatch do not match expected Unable to create a thread: TEST FAILED - In the past, the failure below was caused by an issue where CRIU can't be found on the PATH. + Could not dump the JVM processes, err=-70 ---> + bash $SCRIPPATH$ $TEST_RESROOT$ $JAVA_COMMAND$ "$JVM_OPTIONS$ -XX:+ThrowOnDelayedCheckpointOperation -Xdump:system:events=user -Xtrace:print=j9vm.731 --add-opens java.base/jdk.internal.misc=ALL-UNNAMED --add-exports java.base/openj9.internal.criu=ALL-UNNAMED" $MAINCLASS_DEADLOCK_TEST$ ClinitTest 1 User requested Java dump using diff --git a/test/functional/cmdLineTests/criu/playlist.xml b/test/functional/cmdLineTests/criu/playlist.xml index 9f186d56749..6960d63a6aa 100644 --- a/test/functional/cmdLineTests/criu/playlist.xml +++ b/test/functional/cmdLineTests/criu/playlist.xml @@ -110,6 +110,10 @@ https://github.com/eclipse-openj9/openj9/issues/18468 ppc64le.* + + https://github.com/eclipse-openj9/openj9/issues/18570 + s390x.* + TR_Options=$(Q)exclude={org/openj9/criu/TimeChangeTest.nanoTimeInt()J},dontInline={org/openj9/criu/TimeChangeTest.nanoTimeInt()J|org/openj9/criu/TimeChangeTest.nanoTimeJit()J},{org/openj9/criu/TimeChangeTest.nanoTimeJit()J}(count=1)$(Q) \ diff --git a/test/functional/cmdLineTests/criu/src/org/openj9/criu/DeadlockTest.java b/test/functional/cmdLineTests/criu/src/org/openj9/criu/DeadlockTest.java index e76f32c127f..ca9fe52ece7 100644 --- a/test/functional/cmdLineTests/criu/src/org/openj9/criu/DeadlockTest.java +++ b/test/functional/cmdLineTests/criu/src/org/openj9/criu/DeadlockTest.java @@ -76,7 +76,7 @@ public static void checkpointDeadlock() { Thread t1 = new Thread(() -> { synchronized (lock) { - testResult.lockStatus = 1; + testResult.lockStatus.set(1); try { Thread.sleep(20000); } catch (InterruptedException e) { @@ -95,7 +95,7 @@ public static void checkpointDeadlock() { } }); - while (testResult.lockStatus == 0) { + while (testResult.lockStatus.get() == 0) { Thread.yield(); } @@ -143,7 +143,7 @@ public static void notCheckpointSafeDeadlock() { Thread t1 = new Thread(() -> { Runnable run = () -> { synchronized (lock) { - testResult.lockStatus = 1; + testResult.lockStatus.set(1); try { Thread.sleep(20000); } catch (InterruptedException e) { @@ -160,7 +160,7 @@ public static void notCheckpointSafeDeadlock() { CRIUSupport criuSupport = new CRIUSupport(path); - while (testResult.lockStatus == 0) { + while (testResult.lockStatus.get() == 0) { Thread.yield(); } @@ -187,7 +187,7 @@ public static void methodTypeDeadlockTest() { Path path = Paths.get("cpData"); final TestResult testResult = new TestResult(true, 0); Runnable run = () -> { - testResult.lockStatus++; + testResult.lockStatus.incrementAndGet(); for (int i = 0; i < 30; i++) { URL[] urlArray = { A.class.getProtectionDomain().getCodeSource().getLocation() }; URLClassLoader loader = new URLClassLoader(urlArray); @@ -203,7 +203,7 @@ public static void methodTypeDeadlockTest() { thread.start(); } - while (testResult.lockStatus < 5) { + while (testResult.lockStatus.get() < 5) { Thread.yield(); } @@ -258,7 +258,7 @@ public static void clinitTest() { Path path = Paths.get("cpData"); mainTestResult.testPassed = false; - mainTestResult.lockStatus = 0; + mainTestResult.lockStatus.set(0); Thread t1 = new Thread(()->{ new ClinitDeadlock(); @@ -266,7 +266,7 @@ public static void clinitTest() { t1.start(); - while (mainTestResult.lockStatus == 0) { + while (mainTestResult.lockStatus.get() == 0) { Thread.yield(); } @@ -295,7 +295,7 @@ public static void clinitTest2() { Path path = Paths.get("cpData"); mainTestResult.testPassed = false; - mainTestResult.lockStatus = 0; + mainTestResult.lockStatus.set(0); Thread t1 = new Thread(()->{ new ClinitDeadlock(); @@ -303,7 +303,7 @@ public static void clinitTest2() { t1.start(); - while (mainTestResult.lockStatus == 0) { + while (mainTestResult.lockStatus.get() == 0) { Thread.yield(); } @@ -331,7 +331,7 @@ public static void clinitTest2() { static class ClinitDeadlock { static { - mainTestResult.lockStatus = 1; + mainTestResult.lockStatus.set(1); synchronized(lock) { try { System.out.println("Thread waiting"); diff --git a/test/functional/cmdLineTests/criu/src/org/openj9/criu/JDK11UpTimeoutAdjustmentTest.java b/test/functional/cmdLineTests/criu/src/org/openj9/criu/JDK11UpTimeoutAdjustmentTest.java index 3391131cb9f..512adc66106 100644 --- a/test/functional/cmdLineTests/criu/src/org/openj9/criu/JDK11UpTimeoutAdjustmentTest.java +++ b/test/functional/cmdLineTests/criu/src/org/openj9/criu/JDK11UpTimeoutAdjustmentTest.java @@ -78,7 +78,7 @@ private void test(String testName) throws InterruptedException { throw new RuntimeException("Unrecognized test name: " + testName); } - while (testResult.lockStatus == 0) { + while (testResult.lockStatus.get() == 0) { Thread.currentThread().yield(); } CRIUTestUtils.checkPointJVMNoSetup(criu, CRIUTestUtils.imagePath, false); @@ -138,7 +138,7 @@ public static void showMessages(String logStr, long expectedTime, boolean isMill private void testThreadParkHelper(String testName) { CRIUTestUtils.showThreadCurrentTime(testName + " before park()"); final long startNanoTime = System.nanoTime(); - testResult.lockStatus = 1; + testResult.lockStatus.set(1); unsafe.park(false, nsTime5s); final long endNanoTime = System.nanoTime(); CRIUTestUtils.showThreadCurrentTime(testName + " after park()"); @@ -160,7 +160,7 @@ private void testThreadSleepHelper(String testName) { CRIUTestUtils.showThreadCurrentTime(testName + " before sleep()"); final long startNanoTime = System.nanoTime(); try { - testResult.lockStatus = 1; + testResult.lockStatus.set(1); Thread.sleep(msTime5s); final long endNanoTime = System.nanoTime(); CRIUTestUtils.showThreadCurrentTime(testName + " after sleep()"); @@ -191,7 +191,7 @@ public void run() { final long startNanoTime; synchronized (objWait) { startNanoTime = System.nanoTime(); - testResult.lockStatus = 1; + testResult.lockStatus.set(1); objWait.wait(); } final long endNanoTime = System.nanoTime(); @@ -220,7 +220,7 @@ private void testObjectWaitTimedHelper(String testName, long ms, int ns) { final long startNanoTime; synchronized (objWait) { startNanoTime = System.nanoTime(); - testResult.lockStatus = 1; + testResult.lockStatus.set(1); objWait.wait(ms, ns); } final long endNanoTime = System.nanoTime(); diff --git a/test/functional/cmdLineTests/criu/src/org/openj9/criu/TestConcurrentMode.java b/test/functional/cmdLineTests/criu/src/org/openj9/criu/TestConcurrentMode.java index bb95ad0c4ed..f5cbc78f03d 100644 --- a/test/functional/cmdLineTests/criu/src/org/openj9/criu/TestConcurrentMode.java +++ b/test/functional/cmdLineTests/criu/src/org/openj9/criu/TestConcurrentMode.java @@ -118,45 +118,45 @@ static void TestConcurrentModePreCheckpointHookPriorities() { criu.registerPreCheckpointHook(() -> { CRIUTestUtils.showThreadCurrentTime("The preCheckpointHook with lower priority in CONCURRENT_MODE"); // check if it is the initial value - if (testResult.lockStatus != 0) { + if (testResult.lockStatus.get() != 0) { testResult.testPassed = false; CRIUTestUtils.showThreadCurrentTime("The preCheckpointHook with lower priority in CONCURRENT_MODE failed with testResult.lockStatus = " - + testResult.lockStatus); + + testResult.lockStatus.get()); } else { - testResult.lockStatus = 1; + testResult.lockStatus.set(1); } }, CRIUSupport.HookMode.CONCURRENT_MODE, USER_HOOK_MODE_PRIORITY_LOW); criu.registerPreCheckpointHook(() -> { CRIUTestUtils.showThreadCurrentTime("The preCheckpointHook with higher priority in CONCURRENT_MODE"); // check if it is the value set by the hook with USER_HOOK_MODE_PRIORITY_LOW in CONCURRENT_MODE - if (testResult.lockStatus != 1) { + if (testResult.lockStatus.get() != 1) { testResult.testPassed = false; CRIUTestUtils.showThreadCurrentTime("The preCheckpointHook with higher priority in CONCURRENT_MODE failed with testResult.lockStatus = " - + testResult.lockStatus); + + testResult.lockStatus.get()); } else { - testResult.lockStatus = 2; + testResult.lockStatus.set(2); } }, CRIUSupport.HookMode.CONCURRENT_MODE, USER_HOOK_MODE_PRIORITY_HIGH); criu.registerPreCheckpointHook(() -> { CRIUTestUtils.showThreadCurrentTime("The preCheckpointHook with lower priority in SINGLE_THREAD_MODE"); // check if it is the value set by the hook with USER_HOOK_MODE_PRIORITY_HIGH in CONCURRENT_MODE - if (testResult.lockStatus != 2) { + if (testResult.lockStatus.get() != 2) { testResult.testPassed = false; CRIUTestUtils.showThreadCurrentTime("The preCheckpointHook with lower priority in SINGLE_THREAD_MODE failed with testResult.lockStatus = " - + testResult.lockStatus); + + testResult.lockStatus.get()); } else { - testResult.lockStatus = 3; + testResult.lockStatus.set(3); } }, CRIUSupport.HookMode.SINGLE_THREAD_MODE, USER_HOOK_MODE_PRIORITY_LOW); criu.registerPreCheckpointHook(() -> { CRIUTestUtils.showThreadCurrentTime("The preCheckpointHook with higher priority in SINGLE_THREAD_MODE"); // check if it is the value set by the hook with USER_HOOK_MODE_PRIORITY_LOW in SINGLE_THREAD_MODE - if (testResult.lockStatus != 3) { + if (testResult.lockStatus.get() != 3) { testResult.testPassed = false; CRIUTestUtils.showThreadCurrentTime("The preCheckpointHook with higher priority in SINGLE_THREAD_MODE failed with testResult.lockStatus = " - + testResult.lockStatus); + + testResult.lockStatus.get()); } else { - testResult.lockStatus = 4; + testResult.lockStatus.set(4); } }, CRIUSupport.HookMode.SINGLE_THREAD_MODE, USER_HOOK_MODE_PRIORITY_HIGH); @@ -217,45 +217,45 @@ static void TestConcurrentModePostRestoreHookPriorities() { criu.registerPostRestoreHook(() -> { CRIUTestUtils.showThreadCurrentTime("The postRestoreHook with lower priority in CONCURRENT_MODE"); // check if it is the value set by the hook with USER_HOOK_MODE_PRIORITY_LOW in CONCURRENT_MODE - if (testResult.lockStatus != 3) { + if (testResult.lockStatus.get() != 3) { testResult.testPassed = false; CRIUTestUtils.showThreadCurrentTime("The postRestoreHook with lower priority in CONCURRENT_MODE failed with testResult.lockStatus = " - + testResult.lockStatus); + + testResult.lockStatus.get()); } else { - testResult.lockStatus = 4; + testResult.lockStatus.set(4); } }, CRIUSupport.HookMode.CONCURRENT_MODE, USER_HOOK_MODE_PRIORITY_LOW); criu.registerPostRestoreHook(() -> { CRIUTestUtils.showThreadCurrentTime("The postRestoreHook with higher priority in CONCURRENT_MODE"); // check if it is the value set by the hook with USER_HOOK_MODE_PRIORITY_LOW in SINGLE_THREAD_MODE - if (testResult.lockStatus != 2) { + if (testResult.lockStatus.get() != 2) { testResult.testPassed = false; CRIUTestUtils.showThreadCurrentTime("The postRestoreHook with higher priority in CONCURRENT_MODE failed with testResult.lockStatus = " - + testResult.lockStatus); + + testResult.lockStatus.get()); } else { - testResult.lockStatus = 3; + testResult.lockStatus.set(3); } }, CRIUSupport.HookMode.CONCURRENT_MODE, USER_HOOK_MODE_PRIORITY_HIGH); criu.registerPostRestoreHook(() -> { CRIUTestUtils.showThreadCurrentTime("The postRestoreHook with lower priority in SINGLE_THREAD_MODE"); // check if it is the value set by the hook with USER_HOOK_MODE_PRIORITY_HIGH in SINGLE_THREAD_MODE - if (testResult.lockStatus != 1) { + if (testResult.lockStatus.get() != 1) { testResult.testPassed = false; CRIUTestUtils.showThreadCurrentTime("The postRestoreHook with lower priority in SINGLE_THREAD_MODE failed with testResult.lockStatus = " - + testResult.lockStatus); + + testResult.lockStatus.get()); } else { - testResult.lockStatus = 2; + testResult.lockStatus.set(2); } }, CRIUSupport.HookMode.SINGLE_THREAD_MODE, USER_HOOK_MODE_PRIORITY_LOW); criu.registerPostRestoreHook(() -> { CRIUTestUtils.showThreadCurrentTime("The postRestoreHook with higher priority in SINGLE_THREAD_MODE"); // check if it is the initial value - if (testResult.lockStatus != 0) { + if (testResult.lockStatus.get() != 0) { testResult.testPassed = false; CRIUTestUtils.showThreadCurrentTime("The postRestoreHook with higher priority in SINGLE_THREAD_MODE failed with testResult.lockStatus = " - + testResult.lockStatus); + + testResult.lockStatus.get()); } else { - testResult.lockStatus = 1; + testResult.lockStatus.set(1); } }, CRIUSupport.HookMode.SINGLE_THREAD_MODE, USER_HOOK_MODE_PRIORITY_HIGH); diff --git a/test/functional/cmdLineTests/criu/src/org/openj9/criu/TestResult.java b/test/functional/cmdLineTests/criu/src/org/openj9/criu/TestResult.java index 5a076ff3db6..3b41c02686d 100644 --- a/test/functional/cmdLineTests/criu/src/org/openj9/criu/TestResult.java +++ b/test/functional/cmdLineTests/criu/src/org/openj9/criu/TestResult.java @@ -21,12 +21,14 @@ *******************************************************************************/ package org.openj9.criu; +import java.util.concurrent.atomic.AtomicInteger; + public class TestResult { boolean testPassed; - volatile int lockStatus; + AtomicInteger lockStatus; TestResult(boolean testPassed, int lockStatus) { this.testPassed = testPassed; - this.lockStatus = lockStatus; + this.lockStatus = new AtomicInteger(lockStatus); } }