Skip to content

Commit 01300b7

Browse files
committed
fixup: Refactor for review
1 parent 495eb47 commit 01300b7

File tree

10 files changed

+573
-295
lines changed

10 files changed

+573
-295
lines changed

bin/tmpnetctl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
# Ensure the go command is run from the root of the repository
6+
AVALANCHE_PATH=$(cd "$( dirname "${BASH_SOURCE[0]}" )"; cd .. && pwd )
7+
cd "${AVALANCHE_PATH}"
8+
9+
# Build if needed
10+
if [[ ! -f ./build/tmpnetctl ]]; then
11+
./scripts/build_tmpnetctl.sh
12+
fi
13+
./build/tmpnetctl

tests/e2e/README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,32 @@ these bootstrap checks during development, set the
107107
```bash
108108
E2E_SKIP_BOOTSTRAP_CHECKS=1 ./bin/ginkgo -v ./tests/e2e ...
109109
```
110+
111+
## Monitoring
112+
113+
It is possible to enable collection of logs and metrics from the
114+
temporary networks used for e2e testing by:
115+
116+
- Supplying `--enable-collectors` as an argument to the test suite
117+
- Starting collectors in advance of a test run with `tmpnetctl
118+
start-collectors`
119+
120+
Both methods require:
121+
122+
- Auth credentials to be supplied as env vars:
123+
- `PROMETHEUS_USERNAME`
124+
- `PROMETHEUS_PASSWORD`
125+
- `LOKI_USERNAME`
126+
- `LOKI_PASSWORD`
127+
- The availability in the path of binaries for promtail and prometheus
128+
- Starting a development shell with `nix develop` is one way to
129+
ensure this and requires the [installation of
130+
nix](https://github.com/DeterminateSystems/nix-installer?tab=readme-ov-file#install-nix).
131+
132+
Once started, the collectors will continue to run in the background
133+
until stopped by `tmpnetctl stop-collectors`.
134+
135+
The results of collection will be viewable at
136+
https://grafana-poc.avax-dev.network.
137+
138+
For more detail, see the [tmpnet docs](../tmpnet/README.md#monitoring).

tests/fixture/e2e/env.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ func NewTestEnvironment(tc tests.TestContext, flagVars *FlagVars, desiredNetwork
131131
}
132132

133133
if flagVars.EnableCollectors() {
134-
require.NoError(tmpnet.EnsureCollectorsRunning(tc.Log()))
134+
require.NoError(tmpnet.EnsureCollectorsRunning(tc.DefaultContext(), tc.Log()))
135135
}
136136

137137
// Start a new network

tests/fixture/e2e/flags.go

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,6 @@ func (v *FlagVars) NodeCount() int {
9696
return v.nodeCount
9797
}
9898

99-
func GetEnvWithDefault(envVar, defaultVal string) string {
100-
val := os.Getenv(envVar)
101-
if len(val) == 0 {
102-
return defaultVal
103-
}
104-
return val
105-
}
106-
10799
func RegisterFlags() *FlagVars {
108100
vars := FlagVars{}
109101
flag.StringVar(
@@ -118,7 +110,7 @@ func RegisterFlags() *FlagVars {
118110
flag.StringVar(
119111
&vars.pluginDir,
120112
"plugin-dir",
121-
GetEnvWithDefault(tmpnet.AvalancheGoPluginDirEnvName, os.ExpandEnv("$HOME/.avalanchego/plugins")),
113+
tmpnet.GetEnvWithDefault(tmpnet.AvalancheGoPluginDirEnvName, os.ExpandEnv("$HOME/.avalanchego/plugins")),
122114
fmt.Sprintf(
123115
"[optional] the dir containing VM plugins. Also possible to configure via the %s env variable.",
124116
tmpnet.AvalancheGoPluginDirEnvName,
@@ -142,12 +134,7 @@ func RegisterFlags() *FlagVars {
142134
false,
143135
"[optional] restart an existing network previously started with --reuse-network. Useful for ensuring a network is running with the current state of binaries on disk. Ignored if a network is not already running or --stop-network is provided.",
144136
)
145-
flag.BoolVar(
146-
&vars.enableCollectors,
147-
"enable-collectors",
148-
cast.ToBool(GetEnvWithDefault("TMPNET_ENABLE_COLLECTORS", "false")),
149-
"[optional] whether to enable collectors of logs and metrics from nodes of the temporary network.",
150-
)
137+
SetEnableCollectorsFlag(&vars.enableCollectors)
151138
flag.BoolVar(
152139
&vars.startNetwork,
153140
"start-network",
@@ -169,3 +156,13 @@ func RegisterFlags() *FlagVars {
169156

170157
return &vars
171158
}
159+
160+
// Enable reuse by the upgrade job
161+
func SetEnableCollectorsFlag(p *bool) {
162+
flag.BoolVar(
163+
p,
164+
"enable-collectors",
165+
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_ENABLE_COLLECTORS", "false")),
166+
"[optional] whether to enable collectors of logs and metrics from nodes of the temporary network.",
167+
)
168+
}

tests/fixture/tmpnet/README.md

Lines changed: 53 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,24 @@ repositories.
2424
The functionality in this package is grouped by logical purpose into
2525
the following non-test files:
2626

27-
| Filename | Types | Purpose |
28-
|:------------------|:------------|:-----------------------------------------------|
29-
| defaults.go | | Defines common default configuration |
30-
| flags.go | FlagsMap | Simplifies configuration of avalanchego flags |
31-
| genesis.go | | Creates test genesis |
32-
| network.go | Network | Orchestrates and configures temporary networks |
33-
| network_config.go | Network | Reads and writes network configuration |
34-
| node.go | Node | Orchestrates and configures nodes |
35-
| node_config.go | Node | Reads and writes node configuration |
36-
| node_process.go | NodeProcess | Orchestrates node processes |
37-
| subnet.go | Subnet | Orchestrates subnets |
38-
| utils.go | | Defines shared utility functions |
27+
| Filename | Types | Purpose |
28+
|:----------------------------|:------------|:----------------------------------------------------|
29+
| collectors.go | | Starts and stops collectors for logs and metrics |
30+
| defaults.go | | Defines common default configuration |
31+
| detached_process_default.go | | Configures detached processes for darwin and linux |
32+
| detached_process_windows.go | | No-op detached process configuration for windows |
33+
| flags.go | FlagsMap | Simplifies configuration of avalanchego flags |
34+
| genesis.go | | Creates test genesis |
35+
| kube.go | | Library for Kubernetes interaction |
36+
| local_network.go | | Defines configuration for the default local network |
37+
| network.go | Network | Orchestrates and configures temporary networks |
38+
| network_config.go | Network | Reads and writes network configuration |
39+
| network_test.go | | Simple test round-tripping Network serialization |
40+
| node.go | Node | Orchestrates and configures nodes |
41+
| node_config.go | Node | Reads and writes node configuration |
42+
| node_process.go | NodeProcess | Orchestrates node processes |
43+
| subnet.go | Subnet | Orchestrates subnets |
44+
| utils.go | | Defines shared utility functions |
3945

4046
## Usage
4147

@@ -280,35 +286,54 @@ shared.
280286
### Example usage
281287

282288
```bash
283-
# Start prometheus to collect metrics
284-
PROMETHEUS_USERNAME=<username> PROMETHEUS_PASSWORD=<password> ./scripts/run_prometheus.sh
289+
# Start a nix shell to ensure the availability of promtail and prometheus.
290+
nix develop
285291

286-
# Start promtail to collect logs
287-
LOKI_USERNAME=<username> LOKI_PASSWORD=<password> ./scripts/run_promtail.sh
292+
# Enable collection of logs and metrics
293+
PROMETHEUS_USERNAME=<username> \
294+
PROMETHEUS_PASSWORD=<password> \
295+
LOKI_USERNAME=<username> \
296+
LOKI_PASSWORD=<password> \
297+
./bin/tmpnetctl start-collectors
288298

289299
# Network start emits link to grafana displaying collected logs and metrics
290300
./bin/tmpnetctl start-network
291301

292-
# Configure metrics collection from a local node binding to the default API
293-
# port of 9650 and storing its logs in ~/.avalanchego/logs. The script will
294-
# also emit a link to grafana.
295-
./scripts/configure-local-metrics-collection.sh
302+
# When done with the network, stop the collectors
303+
./bin/tmpnetctl stop-collectors
296304
```
297305

306+
### Starting collectors
307+
308+
Collectors for logs and metrics can be started by `tmpnetctl
309+
start-collectors`:
310+
311+
- Requires that the following env vars be set
312+
- `PROMETHEUS_USERNAME`
313+
- `PROMETHEUS_PASSWORD`
314+
- `LOKI_USERNAME`
315+
- `LOKI_PASSWORD`
316+
- Requires that binaries for promtail and prometheus be available in the path
317+
- Starting a development shell with `nix develop` is one way to
318+
ensure this and requires the [installation of
319+
nix](https://github.com/DeterminateSystems/nix-installer?tab=readme-ov-file#install-nix).
320+
- Starts prometheus in agent mode configured to scrape metrics from
321+
configured nodes and forward them to
322+
https://prometheus-poc.avax-dev.network.
323+
- Starts promtail configured to collect logs from configured nodes
324+
and forward them to https://loki-poc.avax-dev.network.
325+
326+
### Stopping collectors
327+
328+
Collectors for logs and metrics can be stopped by `tmpnetctl
329+
stop-collectors`:
330+
298331
### Metrics collection
299332

300333
When a node is started, configuration enabling collection of metrics
301334
from the node is written to
302335
`~/.tmpnet/prometheus/file_sd_configs/[network uuid]-[node id].json`.
303336

304-
The `scripts/run_prometheus.sh` script starts prometheus in agent mode
305-
configured to scrape metrics from configured nodes and forward the
306-
metrics to a persistent prometheus instance. The script requires that
307-
the `PROMETHEUS_USERNAME` and `PROMETHEUS_PASSWORD` env vars be set. By
308-
default the prometheus instance at
309-
https://prometheus-poc.avax-dev.network will be targeted and
310-
this can be overridden via the `PROMETHEUS_URL` env var.
311-
312337
### Log collection
313338

314339
Nodes log are stored at `~/.tmpnet/networks/[network id]/[node
@@ -320,13 +345,6 @@ collection of logs for the node is written to
320345
`~/.tmpnet/promtail/file_sd_configs/[network
321346
uuid]-[node id].json`.
322347

323-
The `scripts/run_promtail.sh` script starts promtail configured to
324-
collect logs from configured nodes and forward the results to loki. The
325-
script requires that the `LOKI_USERNAME` and `LOKI_PASSWORD` env vars be
326-
set. By default the loki instance at
327-
https://loki-poc.avax-dev.network will be targeted and this
328-
can be overridden via the `LOKI_URL` env var.
329-
330348
### Labels
331349

332350
The logs and metrics collected for temporary networks will have the

tests/fixture/tmpnet/cmd/main.go

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ import (
1616
"go.uber.org/zap"
1717

1818
"github.com/ava-labs/avalanchego/tests"
19-
"github.com/ava-labs/avalanchego/tests/fixture/e2e"
2019
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
2120
"github.com/ava-labs/avalanchego/utils/logging"
2221
"github.com/ava-labs/avalanchego/version"
@@ -124,7 +123,7 @@ func main() {
124123
startNetworkCmd.PersistentFlags().StringVar(
125124
&pluginDir,
126125
"plugin-dir",
127-
e2e.GetEnvWithDefault(tmpnet.AvalancheGoPluginDirEnvName, os.ExpandEnv("$HOME/.avalanchego/plugins")),
126+
tmpnet.GetEnvWithDefault(tmpnet.AvalancheGoPluginDirEnvName, os.ExpandEnv("$HOME/.avalanchego/plugins")),
128127
"[optional] the dir containing VM plugins",
129128
)
130129
startNetworkCmd.PersistentFlags().Uint8Var(&nodeCount, "node-count", tmpnet.DefaultNodeCount, "Number of nodes the network should initially consist of")
@@ -167,6 +166,36 @@ func main() {
167166
}
168167
rootCmd.AddCommand(restartNetworkCmd)
169168

169+
startCollectorsCmd := &cobra.Command{
170+
Use: "start-collectors",
171+
Short: "Start log and metric collectors for local process-based nodes",
172+
RunE: func(*cobra.Command, []string) error {
173+
ctx, cancel := context.WithTimeout(context.Background(), tmpnet.DefaultNetworkTimeout)
174+
defer cancel()
175+
log, err := tests.LoggerForFormat("", rawLogFormat)
176+
if err != nil {
177+
return err
178+
}
179+
return tmpnet.EnsureCollectorsRunning(ctx, log)
180+
},
181+
}
182+
rootCmd.AddCommand(startCollectorsCmd)
183+
184+
stopCollectorsCmd := &cobra.Command{
185+
Use: "stop-collectors",
186+
Short: "Stop log and metric collectors for local process-based nodes",
187+
RunE: func(*cobra.Command, []string) error {
188+
ctx, cancel := context.WithTimeout(context.Background(), tmpnet.DefaultNetworkTimeout)
189+
defer cancel()
190+
log, err := tests.LoggerForFormat("", rawLogFormat)
191+
if err != nil {
192+
return err
193+
}
194+
return tmpnet.EnsureCollectorsStopped(ctx, log)
195+
},
196+
}
197+
rootCmd.AddCommand(stopCollectorsCmd)
198+
170199
if err := rootCmd.Execute(); err != nil {
171200
fmt.Fprintf(os.Stderr, "tmpnetctl failed: %v\n", err)
172201
os.Exit(1)

0 commit comments

Comments
 (0)