Skip to content

Commit 495eb47

Browse files
committed
[tmpnet] Deploy collectors with golang to simplify cross-repo use
Previously, prometheus and promtail were installed and launched by with bash scripts. Migrating installation to nix and launch to golang enables directly sharing the functionality with subnet-evm and hypersdk. No more having to copy and maintain copies of the scripts in multiple repos.
1 parent 0c9fe11 commit 495eb47

File tree

8 files changed

+279
-228
lines changed

8 files changed

+279
-228
lines changed

.github/actions/run-monitored-tmpnet-cmd/action.yml

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,23 +41,6 @@ inputs:
4141
runs:
4242
using: composite
4343
steps:
44-
- name: Start prometheus
45-
# Only run for the original repo; a forked repo won't have access to the monitoring credentials
46-
if: (inputs.prometheus_username != '')
47-
shell: bash
48-
# Assumes calling project has a nix flake that ensures a compatible prometheus
49-
run: nix develop --impure --command bash -x ./scripts/run_prometheus.sh
50-
env:
51-
PROMETHEUS_USERNAME: ${{ inputs.prometheus_username }}
52-
PROMETHEUS_PASSWORD: ${{ inputs.prometheus_password }}
53-
- name: Start promtail
54-
if: (inputs.prometheus_username != '')
55-
shell: bash
56-
# Assumes calling project has a nix flake that ensures a compatible promtail
57-
run: nix develop --impure --command bash -x ./scripts/run_promtail.sh
58-
env:
59-
LOKI_USERNAME: ${{ inputs.loki_username }}
60-
LOKI_PASSWORD: ${{ inputs.loki_password }}
6144
- name: Notify of metrics availability
6245
if: (inputs.prometheus_username != '')
6346
shell: bash
@@ -68,9 +51,14 @@ runs:
6851
FILTER_BY_OWNER: ${{ inputs.filter_by_owner }}
6952
- name: Run command
7053
shell: bash
71-
run: ${{ inputs.run_env }} ${{ inputs.run }}
54+
# --impure ensures the env vars are accessible to the command
55+
run: ${{ inputs.run_env }} nix develop --impure --command bash -x ${{ inputs.run }}
7256
env:
73-
TMPNET_DELAY_NETWORK_SHUTDOWN: true # Ensure shutdown waits for a final metrics scrape
57+
TMPNET_ENABLE_COLLECTORS: true
58+
LOKI_USERNAME: ${{ inputs.loki_username }}
59+
LOKI_PASSWORD: ${{ inputs.loki_password }}
60+
PROMETHEUS_USERNAME: ${{ inputs.prometheus_username }}
61+
PROMETHEUS_PASSWORD: ${{ inputs.prometheus_password }}
7462
GH_REPO: ${{ inputs.repository_owner }}/${{ inputs.repository_name }}
7563
GH_WORKFLOW: ${{ inputs.workflow }}
7664
GH_RUN_ID: ${{ inputs.run_id }}

scripts/run_prometheus.sh

Lines changed: 0 additions & 93 deletions
This file was deleted.

scripts/run_promtail.sh

Lines changed: 0 additions & 91 deletions
This file was deleted.

tests/fixture/e2e/env.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ func NewTestEnvironment(tc tests.TestContext, flagVars *FlagVars, desiredNetwork
130130
}
131131
}
132132

133+
if flagVars.EnableCollectors() {
134+
require.NoError(tmpnet.EnsureCollectorsRunning(tc.Log()))
135+
}
136+
133137
// Start a new network
134138
if network == nil {
135139
network = desiredNetwork

tests/fixture/e2e/flags.go

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,16 @@ import (
1515
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
1616
)
1717

18-
const (
19-
// Ensure that this value takes into account the scrape_interval
20-
// defined in scripts/run_prometheus.sh.
21-
networkShutdownDelay = 12 * time.Second
22-
23-
delayNetworkShutdownEnvName = "TMPNET_DELAY_NETWORK_SHUTDOWN"
24-
)
25-
2618
type FlagVars struct {
27-
avalancheGoExecPath string
28-
pluginDir string
29-
networkDir string
30-
reuseNetwork bool
31-
delayNetworkShutdown bool
32-
startNetwork bool
33-
stopNetwork bool
34-
restartNetwork bool
35-
nodeCount int
19+
avalancheGoExecPath string
20+
pluginDir string
21+
networkDir string
22+
reuseNetwork bool
23+
enableCollectors bool
24+
startNetwork bool
25+
stopNetwork bool
26+
restartNetwork bool
27+
nodeCount int
3628
}
3729

3830
func (v *FlagVars) AvalancheGoExecPath() (string, error) {
@@ -80,10 +72,14 @@ func (v *FlagVars) RestartNetwork() bool {
8072
return v.restartNetwork
8173
}
8274

75+
func (v *FlagVars) EnableCollectors() bool {
76+
return v.enableCollectors
77+
}
78+
8379
func (v *FlagVars) NetworkShutdownDelay() time.Duration {
84-
if v.delayNetworkShutdown {
80+
if v.enableCollectors {
8581
// Only return a non-zero value if the delay is enabled.
86-
return networkShutdownDelay
82+
return tmpnet.NetworkShutdownDelay
8783
}
8884
return 0
8985
}
@@ -147,10 +143,10 @@ func RegisterFlags() *FlagVars {
147143
"[optional] restart an existing network previously started with --reuse-network. Useful for ensuring a network is running with the current state of binaries on disk. Ignored if a network is not already running or --stop-network is provided.",
148144
)
149145
flag.BoolVar(
150-
&vars.delayNetworkShutdown,
151-
"delay-network-shutdown",
152-
cast.ToBool(GetEnvWithDefault(delayNetworkShutdownEnvName, "false")),
153-
"[optional] whether to delay network shutdown to allow a final metrics scrape.",
146+
&vars.enableCollectors,
147+
"enable-collectors",
148+
cast.ToBool(GetEnvWithDefault("TMPNET_ENABLE_COLLECTORS", "false")),
149+
"[optional] whether to enable collectors of logs and metrics from nodes of the temporary network.",
154150
)
155151
flag.BoolVar(
156152
&vars.startNetwork,

tests/fixture/e2e/metrics_link.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ var _ = ginkgo.AfterEach(func() {
4949
// Extend the end time by the shutdown delay (a proxy for the metrics
5050
// scrape interval) to maximize the chances of the specified duration
5151
// including all metrics relevant to the current spec.
52-
endTime := time.Now().Add(networkShutdownDelay).UnixMilli()
52+
endTime := time.Now().Add(tmpnet.NetworkShutdownDelay).UnixMilli()
5353
metricsLink := tmpnet.MetricsLinkForNetwork(
5454
env.GetNetwork().UUID,
5555
strconv.FormatInt(startTime, 10),

tests/fixture/tmpnet/node_process.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,12 @@ func (p *NodeProcess) getProcess() (*os.Process, error) {
226226
return nil, nil
227227
}
228228

229-
proc, err := os.FindProcess(p.pid)
229+
return getProcess(p.pid)
230+
}
231+
232+
// getProcess retrieves the process if it is running.
233+
func getProcess(pid int) (*os.Process, error) {
234+
proc, err := os.FindProcess(pid)
230235
if err != nil {
231236
return nil, fmt.Errorf("failed to find process: %w", err)
232237
}

0 commit comments

Comments
 (0)