Skip to content

Commit

Permalink
fixup: Refactor for review
Browse files Browse the repository at this point in the history
  • Loading branch information
maru-ava committed Feb 22, 2025
1 parent 495eb47 commit 01300b7
Show file tree
Hide file tree
Showing 10 changed files with 573 additions and 295 deletions.
13 changes: 13 additions & 0 deletions bin/tmpnetctl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash

set -euo pipefail

# Ensure the go command is run from the root of the repository
AVALANCHE_PATH=$(cd "$( dirname "${BASH_SOURCE[0]}" )"; cd .. && pwd )
cd "${AVALANCHE_PATH}"

# Build if needed
if [[ ! -f ./build/tmpnetctl ]]; then
./scripts/build_tmpnetctl.sh
fi
./build/tmpnetctl
29 changes: 29 additions & 0 deletions tests/e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,32 @@ these bootstrap checks during development, set the
```bash
E2E_SKIP_BOOTSTRAP_CHECKS=1 ./bin/ginkgo -v ./tests/e2e ...
```

## Monitoring

It is possible to enable collection of logs and metrics from the
temporary networks used for e2e testing by:

- Supplying `--enable-collectors` as an argument to the test suite
- Starting collectors in advance of a test run with `tmpnetctl
start-collectors`

Both methods require:

- Auth credentials to be supplied as env vars:
- `PROMETHEUS_USERNAME`
- `PROMETHEUS_PASSWORD`
- `LOKI_USERNAME`
- `LOKI_PASSWORD`
- The availability in the path of binaries for promtail and prometheus
- Starting a development shell with `nix develop` is one way to
ensure this and requires the [installation of
nix](https://github.com/DeterminateSystems/nix-installer?tab=readme-ov-file#install-nix).

Once started, the collectors will continue to run in the background
until stopped by `tmpnetctl stop-collectors`.

The results of collection will be viewable at
https://grafana-poc.avax-dev.network.

For more detail, see the [tmpnet docs](../tmpnet/README.md#monitoring).
2 changes: 1 addition & 1 deletion tests/fixture/e2e/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func NewTestEnvironment(tc tests.TestContext, flagVars *FlagVars, desiredNetwork
}

if flagVars.EnableCollectors() {
require.NoError(tmpnet.EnsureCollectorsRunning(tc.Log()))
require.NoError(tmpnet.EnsureCollectorsRunning(tc.DefaultContext(), tc.Log()))
}

// Start a new network
Expand Down
27 changes: 12 additions & 15 deletions tests/fixture/e2e/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,6 @@ func (v *FlagVars) NodeCount() int {
return v.nodeCount
}

func GetEnvWithDefault(envVar, defaultVal string) string {
val := os.Getenv(envVar)
if len(val) == 0 {
return defaultVal
}
return val
}

func RegisterFlags() *FlagVars {
vars := FlagVars{}
flag.StringVar(
Expand All @@ -118,7 +110,7 @@ func RegisterFlags() *FlagVars {
flag.StringVar(
&vars.pluginDir,
"plugin-dir",
GetEnvWithDefault(tmpnet.AvalancheGoPluginDirEnvName, os.ExpandEnv("$HOME/.avalanchego/plugins")),
tmpnet.GetEnvWithDefault(tmpnet.AvalancheGoPluginDirEnvName, os.ExpandEnv("$HOME/.avalanchego/plugins")),
fmt.Sprintf(
"[optional] the dir containing VM plugins. Also possible to configure via the %s env variable.",
tmpnet.AvalancheGoPluginDirEnvName,
Expand All @@ -142,12 +134,7 @@ func RegisterFlags() *FlagVars {
false,
"[optional] restart an existing network previously started with --reuse-network. Useful for ensuring a network is running with the current state of binaries on disk. Ignored if a network is not already running or --stop-network is provided.",
)
flag.BoolVar(
&vars.enableCollectors,
"enable-collectors",
cast.ToBool(GetEnvWithDefault("TMPNET_ENABLE_COLLECTORS", "false")),
"[optional] whether to enable collectors of logs and metrics from nodes of the temporary network.",
)
SetEnableCollectorsFlag(&vars.enableCollectors)
flag.BoolVar(
&vars.startNetwork,
"start-network",
Expand All @@ -169,3 +156,13 @@ func RegisterFlags() *FlagVars {

return &vars
}

// Enable reuse by the upgrade job
func SetEnableCollectorsFlag(p *bool) {
flag.BoolVar(
p,
"enable-collectors",
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_ENABLE_COLLECTORS", "false")),
"[optional] whether to enable collectors of logs and metrics from nodes of the temporary network.",
)
}
88 changes: 53 additions & 35 deletions tests/fixture/tmpnet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,24 @@ repositories.
The functionality in this package is grouped by logical purpose into
the following non-test files:

| Filename | Types | Purpose |
|:------------------|:------------|:-----------------------------------------------|
| defaults.go | | Defines common default configuration |
| flags.go | FlagsMap | Simplifies configuration of avalanchego flags |
| genesis.go | | Creates test genesis |
| network.go | Network | Orchestrates and configures temporary networks |
| network_config.go | Network | Reads and writes network configuration |
| node.go | Node | Orchestrates and configures nodes |
| node_config.go | Node | Reads and writes node configuration |
| node_process.go | NodeProcess | Orchestrates node processes |
| subnet.go | Subnet | Orchestrates subnets |
| utils.go | | Defines shared utility functions |
| Filename | Types | Purpose |
|:----------------------------|:------------|:----------------------------------------------------|
| collectors.go | | Starts and stops collectors for logs and metrics |
| defaults.go | | Defines common default configuration |
| detached_process_default.go | | Configures detached processes for darwin and linux |
| detached_process_windows.go | | No-op detached process configuration for windows |
| flags.go | FlagsMap | Simplifies configuration of avalanchego flags |
| genesis.go | | Creates test genesis |
| kube.go | | Library for Kubernetes interaction |
| local_network.go | | Defines configuration for the default local network |
| network.go | Network | Orchestrates and configures temporary networks |
| network_config.go | Network | Reads and writes network configuration |
| network_test.go | | Simple test round-tripping Network serialization |
| node.go | Node | Orchestrates and configures nodes |
| node_config.go | Node | Reads and writes node configuration |
| node_process.go | NodeProcess | Orchestrates node processes |
| subnet.go | Subnet | Orchestrates subnets |
| utils.go | | Defines shared utility functions |

## Usage

Expand Down Expand Up @@ -280,35 +286,54 @@ shared.
### Example usage

```bash
# Start prometheus to collect metrics
PROMETHEUS_USERNAME=<username> PROMETHEUS_PASSWORD=<password> ./scripts/run_prometheus.sh
# Start a nix shell to ensure the availability of promtail and prometheus.
nix develop

# Start promtail to collect logs
LOKI_USERNAME=<username> LOKI_PASSWORD=<password> ./scripts/run_promtail.sh
# Enable collection of logs and metrics
PROMETHEUS_USERNAME=<username> \
PROMETHEUS_PASSWORD=<password> \
LOKI_USERNAME=<username> \
LOKI_PASSWORD=<password> \
./bin/tmpnetctl start-collectors

# Network start emits link to grafana displaying collected logs and metrics
./bin/tmpnetctl start-network

# Configure metrics collection from a local node binding to the default API
# port of 9650 and storing its logs in ~/.avalanchego/logs. The script will
# also emit a link to grafana.
./scripts/configure-local-metrics-collection.sh
# When done with the network, stop the collectors
./bin/tmpnetctl stop-collectors
```

### Starting collectors

Collectors for logs and metrics can be started by `tmpnetctl
start-collectors`:

- Requires that the following env vars be set
- `PROMETHEUS_USERNAME`
- `PROMETHEUS_PASSWORD`
- `LOKI_USERNAME`
- `LOKI_PASSWORD`
- Requires that binaries for promtail and prometheus be available in the path
- Starting a development shell with `nix develop` is one way to
ensure this and requires the [installation of
nix](https://github.com/DeterminateSystems/nix-installer?tab=readme-ov-file#install-nix).
- Starts prometheus in agent mode configured to scrape metrics from
configured nodes and forward them to
https://prometheus-poc.avax-dev.network.
- Starts promtail configured to collect logs from configured nodes
and forward them to https://loki-poc.avax-dev.network.

### Stopping collectors

Collectors for logs and metrics can be stopped by `tmpnetctl
stop-collectors`:

### Metrics collection

When a node is started, configuration enabling collection of metrics
from the node is written to
`~/.tmpnet/prometheus/file_sd_configs/[network uuid]-[node id].json`.

The `scripts/run_prometheus.sh` script starts prometheus in agent mode
configured to scrape metrics from configured nodes and forward the
metrics to a persistent prometheus instance. The script requires that
the `PROMETHEUS_USERNAME` and `PROMETHEUS_PASSWORD` env vars be set. By
default the prometheus instance at
https://prometheus-poc.avax-dev.network will be targeted and
this can be overridden via the `PROMETHEUS_URL` env var.

### Log collection

Nodes log are stored at `~/.tmpnet/networks/[network id]/[node
Expand All @@ -320,13 +345,6 @@ collection of logs for the node is written to
`~/.tmpnet/promtail/file_sd_configs/[network
uuid]-[node id].json`.

The `scripts/run_promtail.sh` script starts promtail configured to
collect logs from configured nodes and forward the results to loki. The
script requires that the `LOKI_USERNAME` and `LOKI_PASSWORD` env vars be
set. By default the loki instance at
https://loki-poc.avax-dev.network will be targeted and this
can be overridden via the `LOKI_URL` env var.

### Labels

The logs and metrics collected for temporary networks will have the
Expand Down
33 changes: 31 additions & 2 deletions tests/fixture/tmpnet/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
"go.uber.org/zap"

"github.com/ava-labs/avalanchego/tests"
"github.com/ava-labs/avalanchego/tests/fixture/e2e"
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
"github.com/ava-labs/avalanchego/utils/logging"
"github.com/ava-labs/avalanchego/version"
Expand Down Expand Up @@ -124,7 +123,7 @@ func main() {
startNetworkCmd.PersistentFlags().StringVar(
&pluginDir,
"plugin-dir",
e2e.GetEnvWithDefault(tmpnet.AvalancheGoPluginDirEnvName, os.ExpandEnv("$HOME/.avalanchego/plugins")),
tmpnet.GetEnvWithDefault(tmpnet.AvalancheGoPluginDirEnvName, os.ExpandEnv("$HOME/.avalanchego/plugins")),
"[optional] the dir containing VM plugins",
)
startNetworkCmd.PersistentFlags().Uint8Var(&nodeCount, "node-count", tmpnet.DefaultNodeCount, "Number of nodes the network should initially consist of")
Expand Down Expand Up @@ -167,6 +166,36 @@ func main() {
}
rootCmd.AddCommand(restartNetworkCmd)

startCollectorsCmd := &cobra.Command{
Use: "start-collectors",
Short: "Start log and metric collectors for local process-based nodes",
RunE: func(*cobra.Command, []string) error {
ctx, cancel := context.WithTimeout(context.Background(), tmpnet.DefaultNetworkTimeout)
defer cancel()
log, err := tests.LoggerForFormat("", rawLogFormat)
if err != nil {
return err
}
return tmpnet.EnsureCollectorsRunning(ctx, log)
},
}
rootCmd.AddCommand(startCollectorsCmd)

stopCollectorsCmd := &cobra.Command{
Use: "stop-collectors",
Short: "Stop log and metric collectors for local process-based nodes",
RunE: func(*cobra.Command, []string) error {
ctx, cancel := context.WithTimeout(context.Background(), tmpnet.DefaultNetworkTimeout)
defer cancel()
log, err := tests.LoggerForFormat("", rawLogFormat)
if err != nil {
return err
}
return tmpnet.EnsureCollectorsStopped(ctx, log)
},
}
rootCmd.AddCommand(stopCollectorsCmd)

if err := rootCmd.Execute(); err != nil {
fmt.Fprintf(os.Stderr, "tmpnetctl failed: %v\n", err)
os.Exit(1)
Expand Down
Loading

0 comments on commit 01300b7

Please sign in to comment.