Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add metrics for debuginfo #1704

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions pkg/debuginfo/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/thanos-io/objstore"
)

Expand Down Expand Up @@ -90,10 +91,28 @@ type ObjectStoreMetadata struct {
logger log.Logger

bucket objstore.Bucket

metadataUpdateDuration prometheus.Histogram
}

func NewObjectStoreMetadata(logger log.Logger, bucket objstore.Bucket) *ObjectStoreMetadata {
return &ObjectStoreMetadata{logger: log.With(logger, "component", "debuginfo-metadata"), bucket: bucket}
func NewObjectStoreMetadata(
logger log.Logger,
reg prometheus.Registerer,
bucket objstore.Bucket,
) *ObjectStoreMetadata {
metadataUpdateDuration := prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "debuginfo_metadata_update_duration_seconds",
Help: "How long it took in seconds to finish updating metadata.",
Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120},
},
)

return &ObjectStoreMetadata{
logger: log.With(logger, "component", "debuginfo-metadata"),
bucket: bucket,
metadataUpdateDuration: metadataUpdateDuration,
}
}

type Metadata struct {
Expand Down Expand Up @@ -170,6 +189,7 @@ func (m *ObjectStoreMetadata) MarkAsUploaded(ctx context.Context, buildID, hash
metaData.BuildID = buildID
metaData.Hash = hash
metaData.UploadFinishedAt = time.Now().Unix()
m.metadataUpdateDuration.Observe(time.Unix(metaData.UploadFinishedAt, 0).Sub(time.Unix(metaData.UploadStartedAt, 0)).Seconds())

metadataBytes, _ := json.MarshalIndent(&metaData, "", "\t")
newData := bytes.NewReader(metadataBytes)
Expand Down
3 changes: 2 additions & 1 deletion pkg/debuginfo/metadata_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ func TestMetadata(t *testing.T) {
store, err := NewStore(
tracer,
logger,
prometheus.NewRegistry(),
cacheDir,
NewObjectStoreMetadata(logger, bucket),
NewObjectStoreMetadata(logger, prometheus.NewRegistry(), bucket),
bucket,
NopDebugInfodClient{},
)
Expand Down
65 changes: 56 additions & 9 deletions pkg/debuginfo/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/nanmu42/limitio"
"github.com/prometheus/client_golang/prometheus"
"github.com/thanos-io/objstore"
"github.com/thanos-io/objstore/client"
"go.opentelemetry.io/otel/attribute"
Expand Down Expand Up @@ -79,28 +80,70 @@ type Store struct {

metadata MetadataManager
debuginfodClient DebugInfodClient

debugInfoUploadAttemptsTotal prometheus.Counter
debugInfoUploadErrorsTotal prometheus.CounterVec
debugInfoUploadDuration prometheus.Histogram
existsCheckDuration prometheus.Histogram
}

// NewStore returns a new debug info store.
func NewStore(
tracer trace.Tracer,
logger log.Logger,
reg prometheus.Registerer,
cacheDir string,
metadata MetadataManager,
bucket objstore.Bucket,
debuginfodClient DebugInfodClient,
) (*Store, error) {
debugInfoUploadAttemptsTotal := prometheus.NewCounter(
prometheus.CounterOpts{
Name: "debuginfo_upload_attempts_total",
Help: "Total attempts to upload debuginfo.",
},
)
debugInfoUploadErrorsTotal := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "debuginfo_upload_errors_total",
Help: "Total number of errors in uploading debuginfo.",
},
[]string{"reason"},
)
debugInfoUploadDuration := prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "debuginfo_upload_duration_seconds",
Help: "How long it took in seconds to upload debuginfo.",
Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120},
},
)

existsCheckDuration := prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "debuginfo_exists_check_duration_seconds",
Help: "How long it took in seconds to check existing debuginfo.",
Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120},
},
)

return &Store{
tracer: tracer,
logger: log.With(logger, "component", "debuginfo"),
bucket: bucket,
cacheDir: cacheDir,
metadata: metadata,
debuginfodClient: debuginfodClient,
tracer: tracer,
logger: log.With(logger, "component", "debuginfo"),
bucket: bucket,
cacheDir: cacheDir,
metadata: metadata,
debuginfodClient: debuginfodClient,
debugInfoUploadAttemptsTotal: debugInfoUploadAttemptsTotal,
debugInfoUploadErrorsTotal: *debugInfoUploadErrorsTotal,
debugInfoUploadDuration: debugInfoUploadDuration,
existsCheckDuration: existsCheckDuration,
}, nil
}

func (s *Store) Exists(ctx context.Context, req *debuginfopb.ExistsRequest) (*debuginfopb.ExistsResponse, error) {
defer func(begin time.Time) {
s.existsCheckDuration.Observe(time.Since(begin).Seconds())
}(time.Now())
span := trace.SpanFromContext(ctx)
span.SetAttributes(attribute.String("build_id", req.GetBuildId()))

Expand Down Expand Up @@ -140,8 +183,13 @@ func (s *Store) Exists(ctx context.Context, req *debuginfopb.ExistsRequest) (*de
}

func (s *Store) Upload(stream debuginfopb.DebugInfoService_UploadServer) error {
defer func(begin time.Time) {
s.debugInfoUploadDuration.Observe(time.Since(begin).Seconds())
}(time.Now())
s.debugInfoUploadAttemptsTotal.Inc()
req, err := stream.Recv()
if err != nil {
s.debugInfoUploadErrorsTotal.WithLabelValues("stream_receive").Inc()
msg := "failed to receive upload info"
level.Error(s.logger).Log("msg", msg, "err", err)
return status.Errorf(codes.Unknown, msg)
Expand All @@ -159,6 +207,7 @@ func (s *Store) Upload(stream debuginfopb.DebugInfoService_UploadServer) error {
span.SetAttributes(attribute.String("hash", hash))

if err := s.upload(ctx, buildID, hash, r); err != nil {
s.debugInfoUploadErrorsTotal.WithLabelValues("store_upload").Inc()
return err
}

Expand Down Expand Up @@ -223,8 +272,8 @@ func (s *Store) upload(ctx context.Context, buildID, hash string, r io.Reader) e
if err != nil {
return status.Error(codes.Internal, err.Error())
}

if err := elfutils.ValidateFile(objFile); err != nil {
s.debugInfoUploadErrorsTotal.WithLabelValues("validation").Inc()
// Failed to validate. Mark the file as corrupted, and let the client try to upload it again.
if err := s.metadata.MarkAsCorrupted(ctx, buildID); err != nil {
level.Warn(s.logger).Log("msg", "failed to update metadata as corrupted", "err", err)
Expand All @@ -244,7 +293,6 @@ func (s *Store) upload(ctx context.Context, buildID, hash string, r io.Reader) e
level.Debug(s.logger).Log("msg", "failed to check for DWARF", "err", err)
}
f.Close()

if hasDWARF {
return status.Error(codes.AlreadyExists, "debuginfo already exists")
}
Expand All @@ -253,7 +301,6 @@ func (s *Store) upload(ctx context.Context, buildID, hash string, r io.Reader) e

// At this point we know that we received a better version of the debug information file,
// so let the client upload it.

if err := s.metadata.MarkAsUploading(ctx, buildID); err != nil {
err = fmt.Errorf("failed to update metadata before uploading: %w", err)
return status.Error(codes.Internal, err.Error())
Expand Down
3 changes: 2 additions & 1 deletion pkg/debuginfo/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ func TestStore(t *testing.T) {
s, err := NewStore(
tracer,
logger,
prometheus.NewRegistry(),
cacheDir,
NewObjectStoreMetadata(logger, bucket),
NewObjectStoreMetadata(logger, prometheus.NewRegistry(), bucket),
bucket,
NopDebugInfodClient{},
)
Expand Down
3 changes: 2 additions & 1 deletion pkg/parca/parca.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,10 +310,11 @@ func Run(ctx context.Context, logger log.Logger, reg *prometheus.Registry, flags
}
}

dbgInfoMetadata := debuginfo.NewObjectStoreMetadata(logger, bucket)
dbgInfoMetadata := debuginfo.NewObjectStoreMetadata(logger, reg, bucket)
dbgInfo, err := debuginfo.NewStore(
tracerProvider.Tracer("debuginfo"),
logger,
reg,
flags.DebuginfoCacheDir,
dbgInfoMetadata,
objstore.NewPrefixedBucket(bucket, "debuginfo"),
Expand Down
3 changes: 2 additions & 1 deletion pkg/symbolizer/symbolizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -453,10 +453,11 @@ func setup(t *testing.T) (*grpc.ClientConn, pb.MetastoreServiceClient, *Symboliz
bucket, err := client.NewBucket(logger, cfg, prometheus.NewRegistry(), "parca/store")
require.NoError(t, err)

metadata := debuginfo.NewObjectStoreMetadata(logger, bucket)
metadata := debuginfo.NewObjectStoreMetadata(logger, prometheus.NewRegistry(), bucket)
dbgStr, err := debuginfo.NewStore(
tracer,
logger,
prometheus.NewRegistry(),
debugInfoCacheDir,
metadata,
bucket,
Expand Down