@@ -22,17 +22,14 @@ const (
2222 OP_ERROR_LOGS = "error_logs"
2323)
2424
25- // RDS log format regex pattern to validate log line structure
26- // Expected format: %m:%r:%u@%d:[%p]:%l:%e:%s:%v:%x:%c:%q%a
27- // Example: 2026-02-02 21:35:40.130 UTC:10.24.155.141(34110):mybooks-app@books_store:[32032]:2:40001:2026-02-02 21:33:19 UTC:...
28- // Note: Timezone can be any 3-4 letter abbreviation (UTC, GMT, EST, PST, etc.)
25+ // RDS log format: %m:%r:%u@%d:[%p]:%l:%e:%s:%v:%x:%c:%q%a
2926var rdsLogFormatRegex = regexp .MustCompile (
30- `^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(?:\.\d{3})? [A-Z]{3,4}:` + // timestamp (%m) with timezone
31- `[^:]+:` + // host:port (%r)
32- `[^@]+@[^:]+:` + // user@database (%u@%d)
33- `\[\d*\]:` + // [pid] (%p) - may be empty if not available
34- `\d+:` + // line number (%l)
35- `[A-Z0-9]{5}:` , // SQLSTATE (%e) - exactly 5 alphanumeric chars
27+ `^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(?:\.\d{3})? [A-Z]{3,4}:` +
28+ `[^:]+:` +
29+ `[^@]+@[^:]+:` +
30+ `\[\d*\]:` +
31+ `\d+:` +
32+ `[A-Z0-9]{5}:` ,
3633)
3734
3835var supportedSeverities = map [string ]bool {
@@ -41,28 +38,6 @@ var supportedSeverities = map[string]bool{
4138 "PANIC" : true ,
4239}
4340
44- // PostgreSQL Text Log Format (stderr) - RDS Format
45- //
46- // RDS log_line_prefix format: %m:%r:%u@%d:[%p]:%l:%e:%s:%v:%x:%c:%q%a
47- //
48- // Example log line:
49- // 2025-01-12 10:30:45 UTC:10.0.1.5:54321:app-user@books_store:[9112]:4:57014:2025-01-12 10:29:15 UTC:25/112:0:693c34cb.2398::psqlERROR: canceling statement
50- //
51- // Field mapping:
52- // %m - Timestamp with milliseconds (e.g., "2025-01-12 10:30:45 UTC")
53- // %r - Remote host:port (e.g., "10.0.1.5:54321" or "[local]")
54- // %u@%d - User@Database (e.g., "app-user@books_store")
55- // [%p] - Process ID in brackets (e.g., "[9112]")
56- // %l - Session line number
57- // %e - SQLSTATE error code
58- // %s - Session start timestamp
59- // %v - Virtual transaction ID
60- // %x - Transaction ID
61- // %c - Session ID
62- // %q - Query text (usually empty)
63- // %a - Application name
64- // Message - Log message (severity: message text)
65-
6641type ParsedError struct {
6742 ErrorSeverity string
6843 SQLStateCode string
@@ -98,7 +73,6 @@ type ErrorLogs struct {
9873 stopped * atomic.Bool
9974 wg sync.WaitGroup
10075
101- // Format validation tracking (for rate-limited warnings)
10276 formatCheckMutex sync.Mutex
10377 lastFormatWarning time.Time
10478 validLogsThisMinute int
@@ -204,8 +178,7 @@ func (c *ErrorLogs) processLogLine(entry loki.Entry) error {
204178 return c .parseTextLog (entry )
205179}
206180
207- // parseTextLog extracts fields from stderr text format logs for metrics.
208- // Parses RDS format: %m:%r:%u@%d:[%p]:%l:%e:%s:%v:%x:%c:%q%a
181+ // parseTextLog extracts fields from stderr text format logs for metrics
209182func (c * ErrorLogs ) parseTextLog (entry loki.Entry ) error {
210183 line := entry .Entry .Line
211184
@@ -236,11 +209,9 @@ func (c *ErrorLogs) parseTextLog(entry loki.Entry) error {
236209 return fmt .Errorf ("log line does not match expected RDS format" )
237210 }
238211
239- // Track that we've seen a valid format
240212 c .trackValidFormat ()
241213
242214 // Parse RDS format: %m:%r:%u@%d:[%p]:%l:%e:%s:%v:%x:%c:%q%a
243- // Format already validated by regex, so we can safely extract fields
244215 atIdx := strings .Index (line , "@" )
245216 afterAt := line [atIdx + 1 :]
246217 pidMarkerIdx := strings .Index (afterAt , ":[" )
@@ -293,8 +264,7 @@ func (c *ErrorLogs) parseTextLog(entry loki.Entry) error {
293264 return nil
294265}
295266
296- // isContinuationLine checks if a line is part of a multi-line PostgreSQL error.
297- // Returns true for tab-indented lines or lines starting with DETAIL, HINT, etc.
267+ // isContinuationLine checks if a line is part of a multi-line PostgreSQL error
298268func isContinuationLine (line string ) bool {
299269 if strings .HasPrefix (line , "\t " ) {
300270 return true
@@ -342,8 +312,7 @@ func (c *ErrorLogs) updateMetrics(parsed *ParsedError) {
342312 ).Inc ()
343313}
344314
345- // UpdateSystemID updates the system ID used in metrics labels.
346- // This is thread-safe and can be called while the collector is running.
315+ // UpdateSystemID updates the system ID used in metrics labels
347316func (c * ErrorLogs ) UpdateSystemID (systemID string ) {
348317 c .systemIDMutex .Lock ()
349318 defer c .systemIDMutex .Unlock ()
@@ -364,17 +333,16 @@ func (c *ErrorLogs) trackValidFormat() {
364333 c .validLogsThisMinute ++
365334}
366335
367- // trackInvalidFormat tracks invalid format and emits warning if ALL logs in past minute were invalid
336+ // trackInvalidFormat tracks invalid format and emits warning once per minute if ALL logs were invalid
368337func (c * ErrorLogs ) trackInvalidFormat () {
369338 c .formatCheckMutex .Lock ()
370339 defer c .formatCheckMutex .Unlock ()
371340
372341 c .invalidLogsThisMinute ++
373342
374- // Check if we should emit a warning (once per minute)
343+ // Emit warning once per minute if ALL logs were invalid
375344 now := time .Now ()
376345 if now .Sub (c .lastFormatWarning ) >= time .Minute {
377- // Only warn if ALL logs in this window were invalid
378346 if c .validLogsThisMinute == 0 && c .invalidLogsThisMinute > 0 {
379347 level .Warn (c .logger ).Log (
380348 "msg" , "all PostgreSQL error logs in the last minute had invalid format" ,
@@ -384,7 +352,6 @@ func (c *ErrorLogs) trackInvalidFormat() {
384352 )
385353 }
386354
387- // Reset counters for next minute window
388355 c .lastFormatWarning = now
389356 c .validLogsThisMinute = 0
390357 c .invalidLogsThisMinute = 0
0 commit comments