From b6ba18a344686a930b5b4d81b500533efded69a9 Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 21 Jan 2025 20:28:45 +0530 Subject: [PATCH 1/9] modify translator shutdown api: to directly shutdown rather than providing shutdown handler --- roles/translator/src/lib/mod.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/roles/translator/src/lib/mod.rs b/roles/translator/src/lib/mod.rs index 5a36d1db38..eb77f8f0bc 100644 --- a/roles/translator/src/lib/mod.rs +++ b/roles/translator/src/lib/mod.rs @@ -78,7 +78,7 @@ impl TranslatorSv2 { // Check all tasks if is_finished() is true, if so exit tokio::spawn({ - let shutdown_signal = self.shutdown(); + let shutdown_signal = self.shutdown.clone(); async move { if tokio::signal::ctrl_c().await.is_ok() { info!("Interrupt received"); @@ -94,7 +94,7 @@ impl TranslatorSv2 { match task_status_.state { State::DownstreamShutdown(err) | State::BridgeShutdown(err) | State::UpstreamShutdown(err) => { error!("SHUTDOWN from: {}", err); - self.shutdown().notify_one(); + self.shutdown(); } State::UpstreamTryReconnect(err) => { error!("Trying to reconnect the Upstream because of: {}", err); @@ -126,7 +126,7 @@ impl TranslatorSv2 { } State::Healthy(msg) => { info!("HEALTHY message: {}", msg); - self.shutdown().notify_one(); + self.shutdown(); } } } else { @@ -288,8 +288,13 @@ impl TranslatorSv2 { task_collector.safe_lock(|t| t.push((task.abort_handle(), "init task".to_string()))); } - pub fn shutdown(&self) -> Arc { - self.shutdown.clone() + /// Closes Translator role and any open connection associated with it. + /// + /// Note that this method will result in a full exit of the running + /// Translator and any open connection most be re-initiated upon new + /// start. + pub fn shutdown(&self) { + self.shutdown.notify_last(); } } From c0f0c493daa98674a3d3e15544c322d3b49cec25 Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Fri, 24 Jan 2025 11:40:07 +0530 Subject: [PATCH 2/9] terminate all child task in translator --- roles/translator/src/lib/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/roles/translator/src/lib/mod.rs b/roles/translator/src/lib/mod.rs index eb77f8f0bc..f2385fb633 100644 --- a/roles/translator/src/lib/mod.rs +++ b/roles/translator/src/lib/mod.rs @@ -131,11 +131,13 @@ impl TranslatorSv2 { } } else { info!("Channel closed"); + kill_tasks(task_collector.clone()); break; // Channel closed } } _ = self.shutdown.notified() => { info!("Shutting down gracefully..."); + kill_tasks(task_collector.clone()); break; } } @@ -294,7 +296,7 @@ impl TranslatorSv2 { /// Translator and any open connection most be re-initiated upon new /// start. pub fn shutdown(&self) { - self.shutdown.notify_last(); + self.shutdown.notify_one(); } } From db083f8088290102cc851d36f6baffaacc94ece1 Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 21 Jan 2025 20:29:12 +0530 Subject: [PATCH 3/9] =?UTF-8?q?=1B[200~add=20unit=20test=20for=20translato?= =?UTF-8?q?r=20shutdown=20api?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- roles/translator/src/lib/mod.rs | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/roles/translator/src/lib/mod.rs b/roles/translator/src/lib/mod.rs index f2385fb633..9bebc3b933 100644 --- a/roles/translator/src/lib/mod.rs +++ b/roles/translator/src/lib/mod.rs @@ -308,3 +308,42 @@ fn kill_tasks(task_collector: Arc>>) { } }); } + +#[cfg(test)] +mod tests { + use super::TranslatorSv2; + use ext_config::{Config, File, FileFormat}; + + use crate::*; + + #[tokio::test] + async fn test_shutdown() { + let config_path = "config-examples/tproxy-config-hosted-pool-example.toml"; + let config: ProxyConfig = match Config::builder() + .add_source(File::new(config_path, FileFormat::Toml)) + .build() + { + Ok(settings) => match settings.try_deserialize::() { + Ok(c) => c, + Err(e) => { + dbg!(&e); + return; + } + }, + Err(e) => { + dbg!(&e); + return; + } + }; + let translator = TranslatorSv2::new(config.clone()); + let cloned = translator.clone(); + tokio::spawn(async move { + cloned.start().await; + }); + translator.shutdown(); + let ip = config.downstream_address.clone(); + let port = config.downstream_port; + let translator_addr = format!("{}:{}", ip, port); + assert!(std::net::TcpListener::bind(translator_addr).is_ok()); + } +} From 29fbb91764bb1896e2483c6ac0a192ea3f5ddaf9 Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 21 Jan 2025 20:31:39 +0530 Subject: [PATCH 4/9] modify jdc shutdown api: to directly shutdown rather than providing shutdown handler add unit test for jdc shutdown api --- roles/jd-client/src/lib/mod.rs | 50 ++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/roles/jd-client/src/lib/mod.rs b/roles/jd-client/src/lib/mod.rs index 307adeec37..52bd9ddf0d 100644 --- a/roles/jd-client/src/lib/mod.rs +++ b/roles/jd-client/src/lib/mod.rs @@ -79,7 +79,7 @@ impl JobDeclaratorClient { let task_collector = Arc::new(Mutex::new(vec![])); tokio::spawn({ - let shutdown_signal = self.shutdown(); + let shutdown_signal = self.shutdown.clone(); async move { if tokio::signal::ctrl_c().await.is_ok() { info!("Interrupt received"); @@ -371,8 +371,14 @@ impl JobDeclaratorClient { .await; } - pub fn shutdown(&self) -> Arc { - self.shutdown.clone() + /// Closes JDC role and any open connection associated with it. + /// + /// Note that this method will result in a full exit of the running + /// jd-client and any open connection most be re-initiated upon new + /// start. + #[allow(dead_code)] + pub fn shutdown(&self) { + self.shutdown.notify_one(); } } @@ -409,3 +415,41 @@ impl PoolChangerTrigger { } } } + +#[cfg(test)] +mod tests { + use ext_config::{Config, File, FileFormat}; + + use crate::*; + + #[tokio::test] + async fn test_shutdown() { + let config_path = "config-examples/jdc-config-hosted-example.toml"; + let config: ProxyConfig = match Config::builder() + .add_source(File::new(config_path, FileFormat::Toml)) + .build() + { + Ok(settings) => match settings.try_deserialize::() { + Ok(c) => c, + Err(e) => { + dbg!(&e); + return; + } + }, + Err(e) => { + dbg!(&e); + return; + } + }; + let jdc = JobDeclaratorClient::new(config.clone()); + let cloned = jdc.clone(); + tokio::spawn(async move { + cloned.start().await; + }); + jdc.shutdown(); + let ip = config.downstream_address.clone(); + let port = config.downstream_port; + let jdc_addr = format!("{}:{}", ip, port); + assert!(std::net::TcpListener::bind(jdc_addr).is_ok()); + } +} From 21b3bc5e0c06a80866c06ec21ecc6b5cca0830df Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 21 Jan 2025 20:36:33 +0530 Subject: [PATCH 5/9] Upon shutdown signalling exit process via loop exit and not via process exit syscall --- roles/jd-client/src/lib/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/jd-client/src/lib/mod.rs b/roles/jd-client/src/lib/mod.rs index 52bd9ddf0d..2437db4283 100644 --- a/roles/jd-client/src/lib/mod.rs +++ b/roles/jd-client/src/lib/mod.rs @@ -170,7 +170,7 @@ impl JobDeclaratorClient { }, _ = self.shutdown.notified().fuse() => { info!("Shutting down gracefully..."); - std::process::exit(0); + break 'outer; } }; } From b631089e96a7d9b7bc071403b22f6523a4060ad5 Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 21 Jan 2025 20:37:31 +0530 Subject: [PATCH 6/9] Handle Root Future Termination in Shutdown Scenarios This is crucial because when the `start` method is executed in a runtime that is not the main one, the lifecycle of futures spawned within the `start` method continues, even after the `jdc` process terminates.The earlier approach of simply exiting the loop worked because the `start` method was executed on the main blocking thread, where the Tokio runtime was defined. However, in the case of integration tests, the runtime runs on a different blocking thread, necessitating proper handling of the root future termination. --- roles/jd-client/src/lib/mod.rs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/roles/jd-client/src/lib/mod.rs b/roles/jd-client/src/lib/mod.rs index 2437db4283..6c68bd8e48 100644 --- a/roles/jd-client/src/lib/mod.rs +++ b/roles/jd-client/src/lib/mod.rs @@ -93,17 +93,18 @@ impl JobDeclaratorClient { let task_collector = task_collector.clone(); let tx_status = tx_status.clone(); let proxy_config = proxy_config.clone(); + let root_handler; if let Some(upstream) = proxy_config.upstreams.get(upstream_index) { let tx_status = tx_status.clone(); let task_collector = task_collector.clone(); let upstream = upstream.clone(); - tokio::spawn(async move { + root_handler = tokio::spawn(async move { Self::initialize_jd(proxy_config, tx_status, task_collector, upstream).await; }); } else { let tx_status = tx_status.clone(); let task_collector = task_collector.clone(); - tokio::spawn(async move { + root_handler = tokio::spawn(async move { Self::initialize_jd_as_solo_miner( proxy_config, tx_status.clone(), @@ -165,11 +166,27 @@ impl JobDeclaratorClient { } } else { info!("Received unknown task. Shutting down."); + task_collector + .safe_lock(|s| { + for handle in s { + handle.abort(); + } + }) + .unwrap(); + root_handler.abort(); break 'outer; } }, _ = self.shutdown.notified().fuse() => { info!("Shutting down gracefully..."); + task_collector + .safe_lock(|s| { + for handle in s { + handle.abort(); + } + }) + .unwrap(); + root_handler.abort(); break 'outer; } }; From 263183e82f8238ba1a38c1693df111bdad3cc79a Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 21 Jan 2025 20:41:34 +0530 Subject: [PATCH 7/9] remove jd-does-not-panic-if-jdc-disconnect from ci's mg yaml and mg test from test module --- .github/workflows/mg.yaml | 8 -- .../jds-config.toml | 20 ---- ...-jds-do-not-panic-on-close-connection.json | 36 ------- ...-do-not-panic-if-jdc-close-connection.json | 96 ------------------- ...ds-do-not-panic-if-jdc-close-connection.sh | 9 -- 5 files changed, 169 deletions(-) delete mode 100644 test/config/jds-do-not-panic-if-jdc-close-connection/jds-config.toml delete mode 100644 test/message-generator/mock/job-declarator-mock-for-jds-do-not-panic-on-close-connection.json delete mode 100644 test/message-generator/test/jds-do-not-panic-if-jdc-close-connection/jds-do-not-panic-if-jdc-close-connection.json delete mode 100755 test/message-generator/test/jds-do-not-panic-if-jdc-close-connection/jds-do-not-panic-if-jdc-close-connection.sh diff --git a/.github/workflows/mg.yaml b/.github/workflows/mg.yaml index 484c7d4fca..8c81e49e9c 100644 --- a/.github/workflows/mg.yaml +++ b/.github/workflows/mg.yaml @@ -40,14 +40,6 @@ jobs: - name: Run jds-do-not-fail-on-wrong-tsdatasucc run: sh ./test/message-generator/test/jds-do-not-fail-on-wrong-tsdatasucc/jds-do-not-fail-on-wrong-tsdatasucc.sh - jds-do-not-panic-if-jdc-close-connection: - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Run jds-do-not-panic-if-jdc-close-connection - run: sh ./test/message-generator/test/jds-do-not-panic-if-jdc-close-connection/jds-do-not-panic-if-jdc-close-connection.sh - jds-do-not-stackoverflow-when-no-token: runs-on: ubuntu-latest steps: diff --git a/test/config/jds-do-not-panic-if-jdc-close-connection/jds-config.toml b/test/config/jds-do-not-panic-if-jdc-close-connection/jds-config.toml deleted file mode 100644 index acda73227c..0000000000 --- a/test/config/jds-do-not-panic-if-jdc-close-connection/jds-config.toml +++ /dev/null @@ -1,20 +0,0 @@ -# SRI Pool config -authority_public_key = "9auqWEzQDVyd2oe1JVGFLMLHZtCo2FFqZwtKA5gd9xbuEu7PH72" -authority_secret_key = "mkDLTBBRxdBv998612qipDYoTK3YUrqLe8uWw7gu3iXbSrn2n" -cert_validity_sec = 3600 - -# list of compressed or uncompressed pubkeys for coinbase payout (only supports 1 item in the array at this point) -coinbase_outputs = [ - { output_script_type = "P2WPKH", output_script_value = "036adc3bdf21e6f9a0f0fb0066bf517e5b7909ed1563d6958a10993849a7554075" }, -] - -listen_jd_address = "127.0.0.1:34264" - -core_rpc_url = "" -core_rpc_port = 18332 -core_rpc_user = "" -core_rpc_pass = "" -# Time interval used for JDS mempool update -[mempool_update_interval] -unit = "secs" -value = 1 \ No newline at end of file diff --git a/test/message-generator/mock/job-declarator-mock-for-jds-do-not-panic-on-close-connection.json b/test/message-generator/mock/job-declarator-mock-for-jds-do-not-panic-on-close-connection.json deleted file mode 100644 index 58f1f5debe..0000000000 --- a/test/message-generator/mock/job-declarator-mock-for-jds-do-not-panic-on-close-connection.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "version": "2", - "doc": [ - "This test does", - "Soft mock of JD", - "Sends SetupConnection to the pool and waits for .Success" - ], - "frame_builders": [ - { - "type": "automatic", - "message_id": "test/message-generator/messages/common_messages.json::setup_connection_job_declarator" - } - ], - "actions": [ - { - "message_ids": ["setup_connection_job_declarator"], - "role": "client", - "results": [ - { - "type": "match_message_type", - "value": "0x01" - } - ], - "actiondoc": "This action sends SetupConnection and checks that .Success and SetCoinbase is received" - } - ], - "setup_commands": [], - "execution_commands": [], - "cleanup_commands": [], - "role": "client", - "downstream": { - "ip": "127.0.0.1", - "port": 34264, - "pub_key": "9auqWEzQDVyd2oe1JVGFLMLHZtCo2FFqZwtKA5gd9xbuEu7PH72" - } -} diff --git a/test/message-generator/test/jds-do-not-panic-if-jdc-close-connection/jds-do-not-panic-if-jdc-close-connection.json b/test/message-generator/test/jds-do-not-panic-if-jdc-close-connection/jds-do-not-panic-if-jdc-close-connection.json deleted file mode 100644 index 5da3018cd9..0000000000 --- a/test/message-generator/test/jds-do-not-panic-if-jdc-close-connection/jds-do-not-panic-if-jdc-close-connection.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "version": "2", - "doc": [ - "This test does", - "Launch the jd-server", - "Connect JDC", - "Terminate JDC", - "Connect JDC again", - "JDS msut be up when the second JDC do connect" - ], - "frame_builders": [ - ], - "actions": [ - ], - "setup_commands": [ - { - "command": "cargo", - "args": [ - "llvm-cov", - "--no-report", - "run", - "-p", - "jd_server", - "--", - "-c", - "../test/config/jds-do-not-panic-if-jdc-close-connection/jds-config.toml" - ], - "conditions": { - "WithConditions": { - "conditions": [ - { - "output_string": "JD INITIALIZED", - "output_location": "StdOut", - "late_condition": false, - "condition": true - } - ], - "timer_secs": 300, - "warn_no_panic": false - } - } - }, - { - "command": "cargo", - "args": [ - "run", - "../../test/message-generator/mock/job-declarator-mock-for-jds-do-not-panic-on-close-connection.json" - ], - "conditions": { - "WithConditions": { - "conditions": [ - { - "output_string": "MATCHED MESSAGE TYPE 1", - "output_location": "StdOut", - "late_condition": false, - "condition": true - } - ], - "timer_secs": 600, - "warn_no_panic": false - } - } - }, - { - "command": "cargo", - "args": [ - "run", - "../../test/message-generator/mock/job-declarator-mock-for-jds-do-not-panic-on-close-connection.json" - ], - "conditions": { - "WithConditions": { - "conditions": [ - { - "output_string": "MATCHED MESSAGE TYPE 1", - "output_location": "StdOut", - "late_condition": false, - "condition": true - } - ], - "timer_secs": 600, - "warn_no_panic": false - } - } - } - ], - "execution_commands": [ - ], - "cleanup_commands": [ - { - "command": "pkill", - "args": ["-f", "jd_server", "-SIGINT"], - "conditions": "None" - } - ], - "role": "none" -} diff --git a/test/message-generator/test/jds-do-not-panic-if-jdc-close-connection/jds-do-not-panic-if-jdc-close-connection.sh b/test/message-generator/test/jds-do-not-panic-if-jdc-close-connection/jds-do-not-panic-if-jdc-close-connection.sh deleted file mode 100755 index 706f13a759..0000000000 --- a/test/message-generator/test/jds-do-not-panic-if-jdc-close-connection/jds-do-not-panic-if-jdc-close-connection.sh +++ /dev/null @@ -1,9 +0,0 @@ -cd roles -cargo llvm-cov --no-report -p jd_server - -cd ../utils/message-generator/ -cargo build - -RUST_LOG=debug cargo run ../../test/message-generator/test/jds-do-not-panic-if-jdc-close-connection/jds-do-not-panic-if-jdc-close-connection.json || { echo 'mg test failed' ; exit 1; } - -sleep 10 From 47fb1286acb90439ce1552835b5c4e3c57b11744 Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 21 Jan 2025 22:05:17 +0530 Subject: [PATCH 8/9] add jds_should_not_panic_if_jdc_shutsdown integration test --- .../tests-integration/tests/jd_integration.rs | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 roles/tests-integration/tests/jd_integration.rs diff --git a/roles/tests-integration/tests/jd_integration.rs b/roles/tests-integration/tests/jd_integration.rs new file mode 100644 index 0000000000..58b98d0989 --- /dev/null +++ b/roles/tests-integration/tests/jd_integration.rs @@ -0,0 +1,37 @@ +use integration_tests_sv2::*; + +use roles_logic_sv2::parsers::{CommonMessages, PoolMessages}; + +// This test verifies that the `jds` (Job Distributor Server) does not panic when the `jdc` +// (Job Distributor Client) shuts down. +// +// The test follows these steps: +// 1. Start a Template Provider (`tp`) and a Pool. +// 2. Start the `jds` and the `jdc`, ensuring the `jdc` connects to the `jds`. +// 3. Shut down the `jdc` and ensure the `jds` remains operational without panicking. +// 4. Verify that the `jdc`'s address can be reused by asserting that a new `TcpListener` can bind +// to the same address. +// 5. Start a Sniffer as a proxy for observing messages exchanged between the `jds` and other +// components. +// 6. Reconnect a new `jdc` to the Sniffer and ensure the expected `SetupConnection` message is +// received from the `jdc`. +// +// This ensures that the shutdown of the `jdc` is handled gracefully by the `jds`, and subsequent +// connections or operations continue without issues. +// +// # Notes +// - The test waits for a brief duration (`1 second`) after shutting down the `jdc` to allow for any +// internal cleanup or reconnection attempts. +#[tokio::test] +async fn jds_should_not_panic_if_jdc_shutsdown() { + let (_tp, tp_addr) = start_template_provider(None).await; + let (_pool, pool_addr) = start_pool(Some(tp_addr)).await; + let (_jds, jds_addr) = start_jds(tp_addr).await; + let (jdc, jdc_addr) = start_jdc(pool_addr, tp_addr, jds_addr).await; + jdc.shutdown(); + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + assert!(tokio::net::TcpListener::bind(jdc_addr).await.is_ok()); + let (sniffer, sniffer_addr) = start_sniffer("0".to_string(), jds_addr, false, None).await; + let (_jdc, _jdc_addr) = start_jdc(pool_addr, tp_addr, sniffer_addr).await; + assert_common_message!(sniffer.next_message_from_downstream(), SetupConnection); +} From 1fe23956c3f200ac0141adf36ff73a49a921278a Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Sat, 1 Feb 2025 07:38:49 +0530 Subject: [PATCH 9/9] remove jds-do-not-panic-if-jdc-close-connection from aggregate result --- .github/workflows/mg.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/mg.yaml b/.github/workflows/mg.yaml index 8c81e49e9c..c0969326e3 100644 --- a/.github/workflows/mg.yaml +++ b/.github/workflows/mg.yaml @@ -140,7 +140,6 @@ jobs: interop-proxy-with-multi-ups, interop-proxy-with-multi-ups-extended, jds-do-not-fail-on-wrong-tsdatasucc, - jds-do-not-panic-if-jdc-close-connection, jds-do-not-stackoverflow-when-no-token, jds-receive-solution-while-processing-declared-job, pool-sri-test-1-standard, @@ -159,7 +158,6 @@ jobs: [ "${{ needs.interop-proxy-with-multi-ups.result }}" != "success" ] || [ "${{ needs.interop-proxy-with-multi-ups-extended.result }}" != "success" ] || [ "${{ needs.jds-do-not-fail-on-wrong-tsdatasucc.result }}" != "success" ] || - [ "${{ needs.jds-do-not-panic-if-jdc-close-connection.result }}" != "success" ] || [ "${{ needs.jds-do-not-stackoverflow-when-no-token.result }}" != "success" ] || [ "${{ needs.jds-receive-solution-while-processing-declared-job.result }}" != "success" ] || [ "${{ needs.pool-sri-test-1-standard.result }}" != "success" ] ||