Skip to content

Commit f39055d

Browse files
authored
Add user-agents to http requests sent by Package Analysis (#1018)
* Add a simple package for setting the user-agent in request headers. Signed-off-by: Caleb Brown <[email protected]> * Ensure user-agents are set when http requests are made. Signed-off-by: Caleb Brown <[email protected]> * Add "production" to the user agent for the production env. Signed-off-by: Caleb Brown <[email protected]> * Tweak the build command to better handle multi file main packages. Signed-off-by: Caleb Brown <[email protected]> --------- Signed-off-by: Caleb Brown <[email protected]>
1 parent b443fd4 commit f39055d

File tree

10 files changed

+246
-86
lines changed

10 files changed

+246
-86
lines changed

.github/workflows/build.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ jobs:
1616
go-version: '1.21.0'
1717
- name: Install libpcap-dev
1818
run: sudo apt-get install -y libpcap-dev
19-
- run: go build -o scheduler cmd/scheduler/main.go
20-
- run: go build -o worker cmd/worker/main.go
21-
- run: go build -o analyze cmd/analyze/main.go
19+
- run: go build -o scheduler ./cmd/scheduler
20+
- run: go build -o worker ./cmd/worker
21+
- run: go build -o analyze ./cmd/analyze
2222
- run: go build -o loader load.go
2323
working-directory: function/loader
2424
- run: go build -o staticanalyze staticanalyze.go

cmd/analyze/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ COPY ./go.sum ./
88
RUN go mod download
99

1010
COPY . ./
11-
RUN go build -o analyze cmd/analyze/main.go && go build -o worker cmd/worker/main.go
11+
RUN go build -o analyze ./cmd/analyze && go build -o worker ./cmd/worker
1212

1313
FROM ubuntu:22.04@sha256:42ba2dfce475de1113d55602d40af18415897167d47c2045ec7b6d9746ff148f
1414

cmd/analyze/main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"flag"
77
"fmt"
88
"log/slog"
9+
"net/http"
910
"os"
1011
"strings"
1112

@@ -19,6 +20,7 @@ import (
1920
"github.com/ossf/package-analysis/internal/resultstore"
2021
"github.com/ossf/package-analysis/internal/sandbox"
2122
"github.com/ossf/package-analysis/internal/staticanalysis"
23+
"github.com/ossf/package-analysis/internal/useragent"
2224
"github.com/ossf/package-analysis/internal/utils"
2325
"github.com/ossf/package-analysis/internal/worker"
2426
"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
@@ -186,6 +188,8 @@ func run() error {
186188
analysisMode.InitFlag()
187189
flag.Parse()
188190

191+
http.DefaultTransport = useragent.DefaultRoundTripper(http.DefaultTransport, "")
192+
189193
if err := featureflags.Update(*features); err != nil {
190194
return usageError{err}
191195
}

cmd/downloader/main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@ import (
55
"errors"
66
"flag"
77
"fmt"
8+
"net/http"
89
"os"
910
"strings"
1011

1112
"github.com/package-url/packageurl-go"
1213

14+
"github.com/ossf/package-analysis/internal/useragent"
1315
"github.com/ossf/package-analysis/internal/worker"
1416
)
1517

@@ -86,6 +88,8 @@ func processFileLine(text string) error {
8688
func run() error {
8789
flag.Parse()
8890

91+
http.DefaultTransport = useragent.DefaultRoundTripper(http.DefaultTransport, "")
92+
8993
if *purlFilePath == "" {
9094
return newCmdError("Please specify packages to download using -f <file>")
9195
}

cmd/worker/config.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
package main
2+
3+
import (
4+
"log/slog"
5+
"os"
6+
7+
"github.com/ossf/package-analysis/internal/resultstore"
8+
"github.com/ossf/package-analysis/internal/worker"
9+
)
10+
11+
// resultBucketPaths holds bucket paths for the different types of results.
12+
type resultBucketPaths struct {
13+
analyzedPkg string
14+
dynamicAnalysis string
15+
executionLog string
16+
fileWrites string
17+
staticAnalysis string
18+
}
19+
20+
type sandboxImageSpec struct {
21+
tag string
22+
noPull bool
23+
}
24+
25+
type config struct {
26+
imageSpec sandboxImageSpec
27+
28+
resultStores *worker.ResultStores
29+
30+
subURL string
31+
packagesBucket string
32+
notificationTopicURL string
33+
34+
userAgentExtra string
35+
}
36+
37+
func (c *config) LogValue() slog.Value {
38+
return slog.GroupValue(
39+
slog.String("subscription", c.subURL),
40+
slog.String("package_bucket", c.packagesBucket),
41+
slog.String("dynamic_results_store", c.resultStores.DynamicAnalysis.String()),
42+
slog.String("static_results_store", c.resultStores.StaticAnalysis.String()),
43+
slog.String("file_write_results_store", c.resultStores.FileWrites.String()),
44+
slog.String("analyzed_packages_store", c.resultStores.AnalyzedPackage.String()),
45+
slog.String("execution_log_store", c.resultStores.ExecutionLog.String()),
46+
slog.String("image_tag", c.imageSpec.tag),
47+
slog.Bool("image_nopull", c.imageSpec.noPull),
48+
slog.String("topic_notification", c.notificationTopicURL),
49+
slog.String("user_agent_extra", c.userAgentExtra),
50+
)
51+
}
52+
53+
func resultStoreForEnv(key string) *resultstore.ResultStore {
54+
val := os.Getenv(key)
55+
if val == "" {
56+
return nil
57+
}
58+
return resultstore.New(val, resultstore.ConstructPath())
59+
}
60+
61+
func configFromEnv() *config {
62+
return &config{
63+
imageSpec: sandboxImageSpec{
64+
tag: os.Getenv("OSSF_SANDBOX_IMAGE_TAG"),
65+
noPull: os.Getenv("OSSF_SANDBOX_NOPULL") != "",
66+
},
67+
resultStores: &worker.ResultStores{
68+
AnalyzedPackage: resultStoreForEnv("OSSF_MALWARE_ANALYZED_PACKAGES"),
69+
DynamicAnalysis: resultStoreForEnv("OSSF_MALWARE_ANALYSIS_RESULTS"),
70+
ExecutionLog: resultStoreForEnv("OSSF_MALWARE_ANALYSIS_EXECUTION_LOGS"),
71+
FileWrites: resultStoreForEnv("OSSF_MALWARE_ANALYSIS_FILE_WRITE_RESULTS"),
72+
StaticAnalysis: resultStoreForEnv("OSSF_MALWARE_STATIC_ANALYSIS_RESULTS"),
73+
},
74+
subURL: os.Getenv("OSSMALWARE_WORKER_SUBSCRIPTION"),
75+
packagesBucket: os.Getenv("OSSF_MALWARE_ANALYSIS_PACKAGES"),
76+
notificationTopicURL: os.Getenv("OSSF_MALWARE_NOTIFICATION_TOPIC"),
77+
78+
userAgentExtra: os.Getenv("OSSF_MALWARE_USER_AGENT_EXTRA"),
79+
}
80+
}

cmd/worker/main.go

Lines changed: 27 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ import (
2424
"github.com/ossf/package-analysis/internal/log"
2525
"github.com/ossf/package-analysis/internal/notification"
2626
"github.com/ossf/package-analysis/internal/pkgmanager"
27-
"github.com/ossf/package-analysis/internal/resultstore"
2827
"github.com/ossf/package-analysis/internal/sandbox"
2928
"github.com/ossf/package-analysis/internal/staticanalysis"
29+
"github.com/ossf/package-analysis/internal/useragent"
3030
"github.com/ossf/package-analysis/internal/worker"
3131
"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
3232
)
@@ -35,20 +35,6 @@ const (
3535
localPkgPathFmt = "/local/%s"
3636
)
3737

38-
// resultBucketPaths holds bucket paths for the different types of results.
39-
type resultBucketPaths struct {
40-
analyzedPkg string
41-
dynamicAnalysis string
42-
executionLog string
43-
fileWrites string
44-
staticAnalysis string
45-
}
46-
47-
type sandboxImageSpec struct {
48-
tag string
49-
noPull bool
50-
}
51-
5238
func copyPackageToLocalFile(ctx context.Context, packagesBucket *blob.Bucket, bucketPath string) (string, *os.File, error) {
5339
if packagesBucket == nil {
5440
return "", nil, errors.New("packages bucket not set")
@@ -77,29 +63,7 @@ func copyPackageToLocalFile(ctx context.Context, packagesBucket *blob.Bucket, bu
7763
return fmt.Sprintf(localPkgPathFmt, path.Base(bucketPath)), f, nil
7864
}
7965

80-
func makeResultStores(dest resultBucketPaths) worker.ResultStores {
81-
resultStores := worker.ResultStores{}
82-
83-
if dest.analyzedPkg != "" {
84-
resultStores.AnalyzedPackage = resultstore.New(dest.analyzedPkg, resultstore.ConstructPath())
85-
}
86-
if dest.dynamicAnalysis != "" {
87-
resultStores.DynamicAnalysis = resultstore.New(dest.dynamicAnalysis, resultstore.ConstructPath())
88-
}
89-
if dest.executionLog != "" {
90-
resultStores.ExecutionLog = resultstore.New(dest.executionLog, resultstore.ConstructPath())
91-
}
92-
if dest.fileWrites != "" {
93-
resultStores.FileWrites = resultstore.New(dest.fileWrites, resultstore.ConstructPath())
94-
}
95-
if dest.staticAnalysis != "" {
96-
resultStores.StaticAnalysis = resultstore.New(dest.staticAnalysis, resultstore.ConstructPath())
97-
}
98-
99-
return resultStores
100-
}
101-
102-
func handleMessage(ctx context.Context, msg *pubsub.Message, packagesBucket *blob.Bucket, resultStores *worker.ResultStores, imageSpec sandboxImageSpec, notificationTopic *pubsub.Topic) error {
66+
func handleMessage(ctx context.Context, msg *pubsub.Message, cfg *config, packagesBucket *blob.Bucket, notificationTopic *pubsub.Topic) error {
10367
name := msg.Metadata["name"]
10468
if name == "" {
10569
slog.WarnContext(ctx, "name is empty")
@@ -132,7 +96,7 @@ func handleMessage(ctx context.Context, msg *pubsub.Message, packagesBucket *blo
13296
)
13397

13498
localPkgPath := ""
135-
sandboxOpts := []sandbox.Option{sandbox.Tag(imageSpec.tag)}
99+
sandboxOpts := []sandbox.Option{sandbox.Tag(cfg.imageSpec.tag)}
136100

137101
if remotePkgPath != "" {
138102
tmpPkgPath, pkgFile, err := copyPackageToLocalFile(ctx, packagesBucket, remotePkgPath)
@@ -146,7 +110,7 @@ func handleMessage(ctx context.Context, msg *pubsub.Message, packagesBucket *blo
146110
sandboxOpts = append(sandboxOpts, sandbox.Volume(pkgFile.Name(), localPkgPath))
147111
}
148112

149-
if imageSpec.noPull {
113+
if cfg.imageSpec.noPull {
150114
sandboxOpts = append(sandboxOpts, sandbox.NoPull())
151115
}
152116

@@ -159,19 +123,24 @@ func handleMessage(ctx context.Context, msg *pubsub.Message, packagesBucket *blo
159123
staticSandboxOpts := append(worker.StaticSandboxOptions(), sandboxOpts...)
160124
dynamicSandboxOpts := append(worker.DynamicSandboxOptions(), sandboxOpts...)
161125

126+
// propogate user agent extras to the static analysis sandbox if it is set.
127+
if cfg.userAgentExtra != "" {
128+
staticSandboxOpts = append(staticSandboxOpts, sandbox.SetEnv("OSSF_MALWARE_USER_AGENT_EXTRA", cfg.userAgentExtra))
129+
}
130+
162131
// run both dynamic and static analysis regardless of error status of either
163132
// and return combined error(s) afterwards, if applicable
164133
staticResults, _, staticAnalysisErr := worker.RunStaticAnalysis(ctx, pkg, staticSandboxOpts, staticanalysis.All)
165134
if staticAnalysisErr == nil {
166-
staticAnalysisErr = worker.SaveStaticAnalysisData(ctx, pkg, resultStores, staticResults)
135+
staticAnalysisErr = worker.SaveStaticAnalysisData(ctx, pkg, cfg.resultStores, staticResults)
167136
}
168137

169138
result, dynamicAnalysisErr := worker.RunDynamicAnalysis(ctx, pkg, dynamicSandboxOpts, "")
170139
if dynamicAnalysisErr == nil {
171-
dynamicAnalysisErr = worker.SaveDynamicAnalysisData(ctx, pkg, resultStores, result.Data)
140+
dynamicAnalysisErr = worker.SaveDynamicAnalysisData(ctx, pkg, cfg.resultStores, result.Data)
172141
}
173142

174-
resultStores.AnalyzedPackageSaved = false
143+
cfg.resultStores.AnalyzedPackageSaved = false
175144

176145
// combine errors
177146
if analysisErr := errors.Join(dynamicAnalysisErr, staticAnalysisErr); analysisErr != nil {
@@ -187,12 +156,12 @@ func handleMessage(ctx context.Context, msg *pubsub.Message, packagesBucket *blo
187156
return nil
188157
}
189158

190-
func messageLoop(ctx context.Context, subURL, packagesBucket, notificationTopicURL string, imageSpec sandboxImageSpec, resultsBuckets *worker.ResultStores) error {
191-
sub, err := pubsub.OpenSubscription(ctx, subURL)
159+
func messageLoop(ctx context.Context, cfg *config) error {
160+
sub, err := pubsub.OpenSubscription(ctx, cfg.subURL)
192161
if err != nil {
193162
return err
194163
}
195-
extender, err := pubsubextender.New(ctx, subURL, sub)
164+
extender, err := pubsubextender.New(ctx, cfg.subURL, sub)
196165
if err != nil {
197166
return err
198167
}
@@ -205,18 +174,18 @@ func messageLoop(ctx context.Context, subURL, packagesBucket, notificationTopicU
205174
// we pass in a nil notificationTopic object to handleMessage
206175
// and continue with the analysis with no notifications published
207176
var notificationTopic *pubsub.Topic
208-
if notificationTopicURL != "" {
209-
notificationTopic, err = pubsub.OpenTopic(ctx, notificationTopicURL)
177+
if cfg.notificationTopicURL != "" {
178+
notificationTopic, err = pubsub.OpenTopic(ctx, cfg.notificationTopicURL)
210179
if err != nil {
211180
return err
212181
}
213182
defer notificationTopic.Shutdown(ctx)
214183
}
215184

216185
var pkgsBkt *blob.Bucket
217-
if packagesBucket != "" {
186+
if cfg.packagesBucket != "" {
218187
var err error
219-
pkgsBkt, err = blob.OpenBucket(ctx, packagesBucket)
188+
pkgsBkt, err = blob.OpenBucket(ctx, cfg.packagesBucket)
220189
if err != nil {
221190
return err
222191
}
@@ -246,7 +215,7 @@ func messageLoop(ctx context.Context, subURL, packagesBucket, notificationTopicU
246215
return fmt.Errorf("error starting message ack deadline extender: %w", err)
247216
}
248217

249-
if err := handleMessage(msgCtx, msg, pkgsBkt, resultsBuckets, imageSpec, notificationTopic); err != nil {
218+
if err := handleMessage(msgCtx, msg, cfg, pkgsBkt, notificationTopic); err != nil {
250219
slog.ErrorContext(msgCtx, "Failed to process message", "error", err)
251220
if err := me.Stop(); err != nil {
252221
slog.ErrorContext(msgCtx, "Extender failed", "error", err)
@@ -267,35 +236,21 @@ func main() {
267236
log.Initialize(os.Getenv("LOGGER_ENV"))
268237

269238
ctx := context.Background()
270-
subURL := os.Getenv("OSSMALWARE_WORKER_SUBSCRIPTION")
271-
packagesBucket := os.Getenv("OSSF_MALWARE_ANALYSIS_PACKAGES")
272-
notificationTopicURL := os.Getenv("OSSF_MALWARE_NOTIFICATION_TOPIC")
273-
enableProfiler := os.Getenv("OSSF_MALWARE_ANALYSIS_ENABLE_PROFILER")
239+
240+
cfg := configFromEnv()
241+
242+
http.DefaultTransport = useragent.DefaultRoundTripper(http.DefaultTransport, cfg.userAgentExtra)
274243

275244
if err := featureflags.Update(os.Getenv("OSSF_MALWARE_FEATURE_FLAGS")); err != nil {
276245
slog.Error("Failed to parse feature flags", "error", err)
277246
os.Exit(1)
278247
}
279248

280-
resultsBuckets := resultBucketPaths{
281-
analyzedPkg: os.Getenv("OSSF_MALWARE_ANALYZED_PACKAGES"),
282-
dynamicAnalysis: os.Getenv("OSSF_MALWARE_ANALYSIS_RESULTS"),
283-
executionLog: os.Getenv("OSSF_MALWARE_ANALYSIS_EXECUTION_LOGS"),
284-
fileWrites: os.Getenv("OSSF_MALWARE_ANALYSIS_FILE_WRITE_RESULTS"),
285-
staticAnalysis: os.Getenv("OSSF_MALWARE_STATIC_ANALYSIS_RESULTS"),
286-
}
287-
resultStores := makeResultStores(resultsBuckets)
288-
289-
imageSpec := sandboxImageSpec{
290-
tag: os.Getenv("OSSF_SANDBOX_IMAGE_TAG"),
291-
noPull: os.Getenv("OSSF_SANDBOX_NOPULL") != "",
292-
}
293-
294249
sandbox.InitNetwork(ctx)
295250

296251
// If configured, start a webserver so that Go's pprof can be accessed for
297252
// debugging and profiling.
298-
if enableProfiler != "" {
253+
if os.Getenv("OSSF_MALWARE_ANALYSIS_ENABLE_PROFILER") != "" {
299254
go func() {
300255
slog.Info("Starting profiler")
301256
http.ListenAndServe(":6060", nil)
@@ -304,20 +259,11 @@ func main() {
304259

305260
// Log the configuration of the worker at startup so we can observe it.
306261
slog.InfoContext(ctx, "Starting worker",
307-
"subscription", subURL,
308-
"package_bucket", packagesBucket,
309-
"results_bucket", resultsBuckets.dynamicAnalysis,
310-
"static_results_bucket", resultsBuckets.staticAnalysis,
311-
"file_write_results_bucket", resultsBuckets.fileWrites,
312-
"analyzed_packages_bucket", resultsBuckets.analyzedPkg,
313-
"execution_log_bucket", resultsBuckets.executionLog,
314-
"image_tag", imageSpec.tag,
315-
"image_nopull", imageSpec.noPull,
316-
"topic_notification", notificationTopicURL,
262+
"config", cfg,
317263
"feature_flags", featureflags.State(),
318264
)
319265

320-
err := messageLoop(ctx, subURL, packagesBucket, notificationTopicURL, imageSpec, &resultStores)
266+
err := messageLoop(ctx, cfg)
321267
if err != nil {
322268
slog.ErrorContext(ctx, "Error encountered", "error", err)
323269
}

infra/worker/workers-set.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ spec:
3939
value: gs://ossf-malware-analysis-packages
4040
- name: OSSF_MALWARE_NOTIFICATION_TOPIC
4141
value: gcppubsub://projects/ossf-malware-analysis/topics/analysis-notify
42+
- name: OSSF_MALWARE_USER_AGENT_EXTRA
43+
value: "production"
4244
- name: OSSF_MALWARE_FEATURE_FLAGS
4345
value: "CodeExecution"
4446
securityContext:

0 commit comments

Comments
 (0)