Skip to content

Commit 3c19abb

Browse files
craig[bot]DarrylWong
andcommitted
Merge #140150
140150: roachprod: add ip address expander func r=srosenberg,herkolategan a=DarrylWong This change adds ip to the list of supported roachprod expander funcs. e.g. `{ip:1-3}` now expands to the private ip addresses of nodes 1 to 3. `:public` can be appended, e.g. `{ip:1-3:public}` to specify the public ip addresses. Additionally, this change also logs expanded commands for roachtests if they differ from the original command. This should ease debugging as we can know what command was actually run on the node. Release note: none Epic: none Fixes: none ---- Additional context is that this PR is motivated by my playing around with iptables on a roachprod cluster. Found running `roachprod ip` repeatedly and copy pasting tedious. Co-authored-by: DarrylWong <[email protected]>
2 parents 0b98fd7 + 3b1a0b0 commit 3c19abb

File tree

9 files changed

+374
-26
lines changed

9 files changed

+374
-26
lines changed

pkg/cmd/roachtest/cluster.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2508,7 +2508,7 @@ func (c *clusterImpl) RunE(ctx context.Context, options install.RunOptions, args
25082508
}
25092509
if err := roachprod.Run(
25102510
ctx, l, c.MakeNodes(nodes), "", "", c.IsSecure(),
2511-
l.Stdout, l.Stderr, args, options.WithExpanderConfig(expanderCfg),
2511+
l.Stdout, l.Stderr, args, options.WithExpanderConfig(expanderCfg).WithLogExpandedCommand(),
25122512
); err != nil {
25132513
if err := ctx.Err(); err != nil {
25142514
l.Printf("(note: incoming context was canceled: %s)", err)
@@ -2578,7 +2578,7 @@ func (c *clusterImpl) RunWithDetails(
25782578
}
25792579
results, err := roachprod.RunWithDetails(
25802580
ctx, l, c.MakeNodes(nodes), "" /* SSHOptions */, "", /* processTag */
2581-
c.IsSecure(), args, options.WithExpanderConfig(expanderCfg),
2581+
c.IsSecure(), args, options.WithExpanderConfig(expanderCfg).WithLogExpandedCommand(),
25822582
)
25832583

25842584
var logFileFull string

pkg/cmd/roachtest/tests/failover.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,11 +1341,9 @@ func (f *blackholeFailer) Fail(ctx context.Context, nodeID int) {
13411341
// FailPartial creates a partial blackhole failure between the given node and
13421342
// peers.
13431343
func (f *blackholeFailer) FailPartial(ctx context.Context, nodeID int, peerIDs []int) {
1344-
peerIPs, err := f.c.InternalIP(ctx, f.t.L(), peerIDs)
1345-
require.NoError(f.t, err)
1346-
1347-
for _, peerIP := range peerIPs {
1344+
for _, peerID := range peerIDs {
13481345
pgport := fmt.Sprintf("{pgport:%d}", nodeID)
1346+
peerIP := fmt.Sprintf("{ip:%d}", peerID)
13491347

13501348
// When dropping both input and output, make sure we drop packets in both
13511349
// directions for both the inbound and outbound TCP connections, such that

pkg/cmd/roachtest/tests/jepsen.go

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -189,12 +189,8 @@ func initJepsen(ctx context.Context, t test.Test, c cluster.Cluster, j jepsenCon
189189
c.Run(ctx, option.WithNodes(workers), "sh", "-c", `"cat controller_id_rsa.pub >> .ssh/authorized_keys"`)
190190
// Prime the known hosts file, and use the unhashed format to
191191
// work around JSCH auth error: https://github.com/jepsen-io/jepsen/blob/master/README.md
192-
ips, err := c.InternalIP(ctx, t.L(), workers)
193-
if err != nil {
194-
t.Fatal(err)
195-
}
196-
for _, ip := range ips {
197-
c.Run(ctx, option.WithNodes(controller), "sh", "-c", fmt.Sprintf(`"ssh-keyscan -t rsa %s >> .ssh/known_hosts"`, ip))
192+
for _, worker := range workers {
193+
c.Run(ctx, option.WithNodes(controller), "sh", "-c", fmt.Sprintf(`"ssh-keyscan -t rsa {ip:%d} >> .ssh/known_hosts"`, worker))
198194
}
199195

200196
t.L().Printf("cluster initialization complete\n")
@@ -313,12 +309,8 @@ func runJepsen(ctx context.Context, t test.Test, c cluster.Cluster, testName, ne
313309

314310
// Get the IP addresses for all our workers.
315311
var nodeFlags []string
316-
ips, err := c.InternalIP(ctx, t.L(), c.Range(1, c.Spec().NodeCount-1))
317-
if err != nil {
318-
t.Fatal(err)
319-
}
320-
for _, ip := range ips {
321-
nodeFlags = append(nodeFlags, "-n "+ip)
312+
for _, node := range c.Range(1, c.Spec().NodeCount-1) {
313+
nodeFlags = append(nodeFlags, fmt.Sprintf("-n {ip:%d}", node))
322314
}
323315
nodesStr := strings.Join(nodeFlags, " ")
324316

pkg/roachprod/install/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ go_test(
5959
srcs = [
6060
"cluster_synced_test.go",
6161
"cockroach_test.go",
62+
"expander_test.go",
6263
"monitor_test.go",
6364
"services_test.go",
6465
"session_test.go",

pkg/roachprod/install/cluster_synced.go

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -782,10 +782,12 @@ func (r *RunResultDetails) Output(decorate bool) string {
782782
type RunCmdOptions struct {
783783
combinedOut bool
784784
includeRoachprodEnvVars bool
785-
stdin io.Reader
786-
stdout, stderr io.Writer
787-
remoteOptions []remoteSessionOption
788-
expanderConfig ExpanderConfig
785+
// If true, logs the expanded result if it differs from the original command.
786+
logExpandedCmd bool
787+
stdin io.Reader
788+
stdout, stderr io.Writer
789+
remoteOptions []remoteSessionOption
790+
expanderConfig ExpanderConfig
789791
}
790792

791793
// Default RunCmdOptions enable combining output (stdout and stderr) and capturing ssh (verbose) debug output.
@@ -826,6 +828,9 @@ func (c *SyncedCluster) runCmdOnSingleNode(
826828
if err != nil {
827829
return &noResult, errors.WithDetailf(err, "error expanding command: %s", cmd)
828830
}
831+
if opts.logExpandedCmd && expandedCmd != cmd {
832+
l.Printf("Node %d expanded cmd: %s", e.node, expandedCmd)
833+
}
829834

830835
nodeCmd := expandedCmd
831836
// Be careful about changing these command strings. In particular, we need
@@ -931,6 +936,7 @@ func (c *SyncedCluster) Run(
931936
stdout: stdout,
932937
stderr: stderr,
933938
expanderConfig: options.ExpanderConfig,
939+
logExpandedCmd: options.LogExpandedCmd,
934940
}
935941
result, err := c.runCmdOnSingleNode(ctx, l, node, cmd, opts)
936942
return result, err
@@ -1015,6 +1021,7 @@ func (c *SyncedCluster) RunWithDetails(
10151021
stdout: l.Stdout,
10161022
stderr: l.Stderr,
10171023
expanderConfig: options.ExpanderConfig,
1024+
logExpandedCmd: options.LogExpandedCmd,
10181025
}
10191026
result, err := c.runCmdOnSingleNode(ctx, l, node, cmd, opts)
10201027
return result, err

pkg/roachprod/install/expander.go

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ var pgURLRe = regexp.MustCompile(`{pgurl(:[-,0-9]+|:(?i)lb)?(:[a-z0-9\-]+)?(:[0-
3737
var pgHostRe = regexp.MustCompile(`{pghost(:[-,0-9]+|:(?i)lb)?(:[a-z0-9\-]+)?(:[0-9]+)?}`)
3838
var pgPortRe = regexp.MustCompile(`{pgport(:[-,0-9]+)?(:[a-z0-9\-]+)?(:[0-9]+)?}`)
3939
var uiPortRe = regexp.MustCompile(`{uiport(:[-,0-9]+)}`)
40+
var ipAddressRe = regexp.MustCompile(`{ip(:\d+([-,]\d+)?)(:public|:private)?}`)
4041
var storeDirRe = regexp.MustCompile(`{store-dir(:[0-9]+)?}`)
4142
var logDirRe = regexp.MustCompile(`{log-dir(:[a-z0-9\-]+)?(:[0-9]+)?}`)
4243
var certsDirRe = regexp.MustCompile(`{certs-dir}`)
@@ -51,10 +52,12 @@ type ExpanderConfig struct {
5152
type expander struct {
5253
node Node
5354

54-
pgURLs map[string]map[Node]string
55-
pgHosts map[Node]string
56-
pgPorts map[Node]string
57-
uiPorts map[Node]string
55+
pgURLs map[string]map[Node]string
56+
pgHosts map[Node]string
57+
pgPorts map[Node]string
58+
uiPorts map[Node]string
59+
publicIPs map[Node]string
60+
privateIPs map[Node]string
5861
}
5962

6063
// expanderFunc is a function which may expand a string with a templated value.
@@ -77,6 +80,7 @@ func (e *expander) expand(
7780
e.maybeExpandStoreDir,
7881
e.maybeExpandLogDir,
7982
e.maybeExpandCertsDir,
83+
e.maybeExpandIPAddress,
8084
}
8185
for _, f := range expanders {
8286
v, expanded, fErr := f(ctx, l, c, cfg, s)
@@ -329,3 +333,40 @@ func (e *expander) maybeExpandCertsDir(
329333
}
330334
return c.CertsDir(e.node), true, nil
331335
}
336+
337+
// maybeExpandIPAddress is an expanderFunc for {ipaddress:<nodeSpec>[:public|private]}
338+
func (e *expander) maybeExpandIPAddress(
339+
ctx context.Context, l *logger.Logger, c *SyncedCluster, cfg ExpanderConfig, s string,
340+
) (string, bool, error) {
341+
m := ipAddressRe.FindStringSubmatch(s)
342+
if m == nil {
343+
return s, false, nil
344+
}
345+
346+
var err error
347+
switch m[3] {
348+
case ":public":
349+
if e.publicIPs == nil {
350+
e.publicIPs = make(map[Node]string, len(c.VMs))
351+
for _, node := range allNodes(len(c.VMs)) {
352+
e.publicIPs[node] = c.Host(node)
353+
}
354+
}
355+
356+
s, err = e.maybeExpandMap(c, e.publicIPs, m[1])
357+
default:
358+
if e.privateIPs == nil {
359+
e.privateIPs = make(map[Node]string, len(c.VMs))
360+
for _, node := range allNodes(len(c.VMs)) {
361+
ip, err := c.GetInternalIP(node)
362+
if err != nil {
363+
return "", false, err
364+
}
365+
e.privateIPs[node] = ip
366+
}
367+
}
368+
369+
s, err = e.maybeExpandMap(c, e.privateIPs, m[1])
370+
}
371+
return s, err == nil, err
372+
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package install
7+
8+
import (
9+
"context"
10+
"encoding/json"
11+
"os"
12+
"testing"
13+
14+
"github.com/cockroachdb/cockroach/pkg/roachprod/cloud"
15+
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
16+
"github.com/cockroachdb/cockroach/pkg/testutils/datapathutils"
17+
"github.com/stretchr/testify/require"
18+
)
19+
20+
func newTestCluster(t *testing.T) *SyncedCluster {
21+
testClusterFile := datapathutils.TestDataPath(t, "test_cluster.txt")
22+
data, err := os.ReadFile(testClusterFile)
23+
if err != nil {
24+
t.Fatalf("could not read test cluster file: %s", err)
25+
}
26+
metadata := &cloud.Cluster{}
27+
if err := json.Unmarshal(data, metadata); err != nil {
28+
t.Fatalf("could not unmarshal test cluster file: %s", err)
29+
}
30+
c, err := NewSyncedCluster(metadata, "all", MakeClusterSettings())
31+
if err != nil {
32+
t.Fatalf("could not create synced cluster: %s", err)
33+
}
34+
return c
35+
}
36+
37+
func TestIPExpander(t *testing.T) {
38+
ctx := context.Background()
39+
l := &logger.Logger{}
40+
41+
c := newTestCluster(t)
42+
e := &expander{
43+
node: c.Nodes[0],
44+
}
45+
cfg := ExpanderConfig{}
46+
47+
for _, tc := range []struct {
48+
name string
49+
command string
50+
expected string
51+
}{
52+
{
53+
name: "same node",
54+
command: "{ip:1}",
55+
expected: "10.142.1.1",
56+
},
57+
{
58+
name: "different node",
59+
command: "{ip:2}",
60+
expected: "10.142.1.2",
61+
},
62+
{
63+
name: "range of nodes",
64+
command: "{ip:1-3}",
65+
expected: "10.142.1.1 10.142.1.2 10.142.1.3",
66+
},
67+
{
68+
name: "non consecutive nodes",
69+
command: "{ip:2,4}",
70+
expected: "10.142.1.2 10.142.1.4",
71+
},
72+
{
73+
name: "public ip",
74+
command: "{ip:1-4:public}",
75+
expected: "35.196.120.1 35.227.25.2 34.75.95.3 34.139.54.4",
76+
},
77+
{
78+
name: "private ip",
79+
command: "{ip:1-4:private}",
80+
expected: "10.142.1.1 10.142.1.2 10.142.1.3 10.142.1.4",
81+
},
82+
} {
83+
t.Run(tc.name, func(t *testing.T) {
84+
res, err := e.expand(ctx, l, c, cfg, tc.command)
85+
require.NoError(t, err)
86+
require.Equal(t, tc.expected, res)
87+
})
88+
}
89+
}

pkg/roachprod/install/run_options.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ type RunOptions struct {
2525
// ExpanderConfig configures the behaviour of the roachprod expander
2626
// during a run.
2727
ExpanderConfig ExpanderConfig
28+
// LogExpandedCmd will log the expanded command if it differs from the original.
29+
LogExpandedCmd bool
2830

2931
// These are private to roachprod
3032
Nodes Nodes
@@ -106,3 +108,8 @@ func (r RunOptions) WithExpanderConfig(cfg ExpanderConfig) RunOptions {
106108
r.ExpanderConfig = cfg
107109
return r
108110
}
111+
112+
func (r RunOptions) WithLogExpandedCommand() RunOptions {
113+
r.LogExpandedCmd = true
114+
return r
115+
}

0 commit comments

Comments
 (0)