Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add storage volume Prometheus metrics from XRootD monitoring packets #532

Merged
merged 1 commit into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 86 additions & 27 deletions metrics/xrootd_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ import (
)

type (
UserId struct {
SummaryStatType string
UserId struct {
Id uint32
}

Expand Down Expand Up @@ -162,16 +163,41 @@ type (
// Ssq XrdXrootdMonStatSSQ // OPTIONAL, not implemented here yet
}

SummaryPathStat struct {
Id string `xml:"id,attr"`
Lp string `xml:"lp"` // The minimally reduced logical file system path i.e. top-level namespace
Free int `xml:"free"` // Kilobytes available
Total int `xml:"tot"` // Kilobytes allocated
}

SummaryPath struct {
Idx int `xml:",chardata"`
Stats []SummaryPathStat `xml:"stats"`
}

SummaryCacheStore struct {
Size int `xml:"size"`
Used int `xml:"used"`
Min int `xml:"min"`
Max int `xml:"max"`
}

SummaryCacheMemory struct {
Size int `xml:"size"`
Used int `xml:"used"`
Wq int `xml:"wq"`
}

SummaryStat struct {
Id string `xml:"id,attr"`
// Relevant for id="link"
// "tot" is the total connections since the start of the server
LinkConnections int `xml:"tot"`
LinkInBytes int `xml:"in"`
LinkOutBytes int `xml:"out"`
// Relevant for id="sched"
Threads int `xml:"threads"`
ThreadsIdle int `xml:"idle"`
Id SummaryStatType `xml:"id,attr"`
Total int `xml:"tot"`
In int `xml:"in"`
Out int `xml:"out"`
Threads int `xml:"threads"`
Idle int `xml:"idle"`
Paths SummaryPath `xml:"paths"` // For Oss Summary Data
Store SummaryCacheStore `xml:"store"`
Memory SummaryCacheMemory `xml:"mem"`
}

SummaryStatistics struct {
Expand All @@ -191,6 +217,14 @@ const (
isDisc
)

// Summary data types
const (
LinkStat SummaryStatType = "link" // https://xrootd.slac.stanford.edu/doc/dev55/xrd_monitoring.htm#_Toc99653739
SchedStat SummaryStatType = "sched" // https://xrootd.slac.stanford.edu/doc/dev55/xrd_monitoring.htm#_Toc99653745
OssStat SummaryStatType = "oss" // https://xrootd.slac.stanford.edu/doc/dev55/xrd_monitoring.htm#_Toc99653741
CacheStat SummaryStatType = "cache" // https://xrootd.slac.stanford.edu/doc/dev55/xrd_monitoring.htm#_Toc99653733
)

var (
PacketsReceived = promauto.NewCounter(prometheus.CounterOpts{
Name: "xrootd_monitoring_packets_received",
Expand Down Expand Up @@ -227,6 +261,11 @@ var (
Help: "Number of bytes read into the server",
}, []string{"direction"})

StorageVolume = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "xrootd_storage_volume_bytes",
Help: "Storage volume usage on the server",
}, []string{"ns", "type", "server_type"}) // type: total/free; server_type: origin/cache

lastStats SummaryStat

// Maps the connection identifier with a user record
Expand Down Expand Up @@ -890,33 +929,53 @@ func HandleSummaryPacket(packet []byte) error {
for _, stat := range summaryStats.Stats {
switch stat.Id {

case "link":
// LinkConnections is the total connections since the start-up of the servcie
case LinkStat:
// When stats tag has id="link", the following definitions are valid:
// stat.Total: Connections since start-up.
// stat.In: Bytes received
// stat.Out: Bytes sent

// Note that stat.Total is the total connections since the start-up of the servcie
// So we just want to make sure here that no negative value is present
incBy := float64(stat.LinkConnections - lastStats.LinkConnections)
if stat.LinkConnections < lastStats.LinkConnections {
incBy = float64(stat.LinkConnections)
incBy := float64(stat.Total - lastStats.Total)
if stat.Total < lastStats.Total {
incBy = float64(stat.Total)
}
Connections.Add(incBy)
lastStats.LinkConnections = stat.LinkConnections
lastStats.Total = stat.Total

incBy = float64(stat.LinkInBytes - lastStats.LinkInBytes)
if stat.LinkInBytes < lastStats.LinkInBytes {
incBy = float64(stat.LinkInBytes)
incBy = float64(stat.In - lastStats.In)
if stat.In < lastStats.In {
incBy = float64(stat.In)
}
BytesXfer.With(prometheus.Labels{"direction": "rx"}).Add(incBy)
lastStats.LinkInBytes = stat.LinkInBytes
lastStats.In = stat.In

incBy = float64(stat.LinkOutBytes - lastStats.LinkOutBytes)
if stat.LinkOutBytes < lastStats.LinkOutBytes {
incBy = float64(stat.LinkOutBytes)
incBy = float64(stat.Out - lastStats.Out)
if stat.Out < lastStats.Out {
incBy = float64(stat.Out)
}
BytesXfer.With(prometheus.Labels{"direction": "tx"}).Add(incBy)
lastStats.LinkOutBytes = stat.LinkOutBytes
case "sched":
Threads.With(prometheus.Labels{"state": "idle"}).Set(float64(stat.ThreadsIdle))
lastStats.Out = stat.Out
case SchedStat:
Threads.With(prometheus.Labels{"state": "idle"}).Set(float64(stat.Idle))
Threads.With(prometheus.Labels{"state": "running"}).Set(float64(stat.Threads -
stat.ThreadsIdle))
stat.Idle))
case OssStat: // Oss stat should only appear on origin servers
for _, pathStat := range stat.Paths.Stats {
noQuoteLp := strings.Replace(pathStat.Lp, "\"", "", 2)
// pathStat.Total is in kilobytes but we want to standardize all data to bytes
StorageVolume.With(prometheus.Labels{"ns": noQuoteLp, "type": "total", "server_type": "origin"}).
Set(float64(pathStat.Total * 1024))
StorageVolume.With(prometheus.Labels{"ns": noQuoteLp, "type": "free", "server_type": "origin"}).
Set(float64(pathStat.Free * 1024))
}
case CacheStat:
cacheStore := stat.Store
StorageVolume.With(prometheus.Labels{"ns": "/cache", "type": "total", "server_type": "cache"}).
Set(float64(cacheStore.Size))
StorageVolume.With(prometheus.Labels{"ns": "/cache", "type": "free", "server_type": "cache"}).
Set(float64(cacheStore.Size - cacheStore.Used))
}
}
return nil
Expand Down
30 changes: 15 additions & 15 deletions metrics/xrootd_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,9 @@ func TestHandlePacket(t *testing.T) {
Program: "xrootd",
Stats: []SummaryStat{
{
Id: "sched",
Threads: 10,
ThreadsIdle: 8,
Id: "sched",
Threads: 10,
Idle: 8,
},
},
}
Expand Down Expand Up @@ -265,10 +265,10 @@ func TestHandlePacket(t *testing.T) {
Program: "xrootd",
Stats: []SummaryStat{
{
Id: "link",
LinkConnections: 9,
LinkInBytes: 99,
LinkOutBytes: 999,
Id: "link",
Total: 9,
In: 99,
Out: 999,
},
},
}
Expand All @@ -277,10 +277,10 @@ func TestHandlePacket(t *testing.T) {
Program: "xrootd",
Stats: []SummaryStat{
{
Id: "link",
LinkConnections: 10,
LinkInBytes: 100,
LinkOutBytes: 1000,
Id: "link",
Total: 10,
In: 100,
Out: 1000,
},
},
}
Expand All @@ -289,10 +289,10 @@ func TestHandlePacket(t *testing.T) {
Program: "cmsd",
Stats: []SummaryStat{
{
Id: "link",
LinkConnections: 2,
LinkInBytes: 0,
LinkOutBytes: 0,
Id: "link",
Total: 2,
In: 0,
Out: 0,
},
},
}
Expand Down
Loading