@@ -19,19 +19,23 @@ package gerrit
19
19
20
20
import (
21
21
"context"
22
+ "errors"
22
23
"fmt"
23
24
"sort"
24
25
"strconv"
25
26
"strings"
26
27
"time"
27
28
29
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
30
+
28
31
"github.com/andygrunwald/go-gerrit"
29
32
"github.com/sirupsen/logrus"
30
33
ctrlruntimeclient "sigs.k8s.io/controller-runtime/pkg/client"
31
34
"sigs.k8s.io/controller-runtime/pkg/reconcile"
32
35
33
36
v1 "k8s.io/test-infra/prow/apis/prowjobs/v1"
34
37
"k8s.io/test-infra/prow/config"
38
+ "k8s.io/test-infra/prow/crier/reporters/criercommonlib"
35
39
"k8s.io/test-infra/prow/gerrit/client"
36
40
"k8s.io/test-infra/prow/kube"
37
41
)
@@ -79,8 +83,9 @@ type gerritClient interface {
79
83
80
84
// Client is a gerrit reporter client
81
85
type Client struct {
82
- gc gerritClient
83
- lister ctrlruntimeclient.Reader
86
+ gc gerritClient
87
+ pjclientset ctrlruntimeclient.Client
88
+ prLocks * criercommonlib.ShardedLock
84
89
}
85
90
86
91
// Job is the view of a prowjob scoped for a report
@@ -101,7 +106,7 @@ type JobReport struct {
101
106
}
102
107
103
108
// NewReporter returns a reporter client
104
- func NewReporter (cfg config.Getter , cookiefilePath string , projects map [string ][]string , lister ctrlruntimeclient.Reader ) (* Client , error ) {
109
+ func NewReporter (cfg config.Getter , cookiefilePath string , projects map [string ][]string , pjclientset ctrlruntimeclient.Client ) (* Client , error ) {
105
110
gc , err := client .NewClient (projects )
106
111
if err != nil {
107
112
return nil , err
@@ -118,10 +123,14 @@ func NewReporter(cfg config.Getter, cookiefilePath string, projects map[string][
118
123
// line arg(which is going to be deprecated).
119
124
gc .Authenticate (cookiefilePath , "" )
120
125
121
- return & Client {
122
- gc : gc ,
123
- lister : lister ,
124
- }, nil
126
+ c := & Client {
127
+ gc : gc ,
128
+ pjclientset : pjclientset ,
129
+ prLocks : criercommonlib .NewShardedLock (),
130
+ }
131
+
132
+ c .prLocks .RunCleanup ()
133
+ return c , nil
125
134
}
126
135
127
136
func applyGlobalConfig (cfg config.Getter , gerritClient * client.Client , cookiefilePath string ) {
@@ -200,7 +209,7 @@ func (c *Client) ShouldReport(ctx context.Context, log *logrus.Entry, pj *v1.Pro
200
209
}
201
210
202
211
var pjs v1.ProwJobList
203
- if err := c .lister .List (ctx , & pjs , ctrlruntimeclient .MatchingLabels (selector )); err != nil {
212
+ if err := c .pjclientset .List (ctx , & pjs , ctrlruntimeclient .MatchingLabels (selector )); err != nil {
204
213
log .WithError (err ).Errorf ("Cannot list prowjob with selector %v" , selector )
205
214
return false
206
215
}
@@ -253,6 +262,50 @@ func (c *Client) ShouldReport(ctx context.Context, log *logrus.Entry, pj *v1.Pro
253
262
// Report will send the current prowjob status as a gerrit review
254
263
func (c * Client ) Report (ctx context.Context , logger * logrus.Entry , pj * v1.ProwJob ) ([]* v1.ProwJob , * reconcile.Result , error ) {
255
264
logger = logger .WithFields (logrus.Fields {"job" : pj .Spec .Job , "name" : pj .Name })
265
+
266
+ // Gerrit reporter hasn't learned how to deduplicate itself from report yet,
267
+ // will need to block here. Unfortunately need to check after this section
268
+ // to ensure that the job was not already marked reported by other threads
269
+ // TODO(chaodaiG): postsubmit job technically doesn't know which PR it's
270
+ // from, currently it's associated with a PR in gerrit in a weird way, which
271
+ // needs to be fixed in
272
+ // https://github.com/kubernetes/test-infra/issues/22653, remove the
273
+ // PostsubmitJob check once it's fixed
274
+ if pj .Spec .Type == v1 .PresubmitJob || pj .Spec .Type == v1 .PostsubmitJob {
275
+ key , err := lockKeyForPJ (pj )
276
+ if err != nil {
277
+ return nil , nil , fmt .Errorf ("failed to get lockkey for job: %w" , err )
278
+ }
279
+ lock , err := c .prLocks .GetLock (ctx , * key )
280
+ if err != nil {
281
+ return nil , nil , err
282
+ }
283
+ if err := lock .Acquire (ctx , 1 ); err != nil {
284
+ return nil , nil , err
285
+ }
286
+ defer lock .Release (1 )
287
+
288
+ // In the case where several prow jobs from the same PR are finished one
289
+ // after another, by the time the lock is acquired, this job might have
290
+ // already been reported by another worker, refetch this pj to make sure
291
+ // that no duplicate report is produced
292
+ pjObjKey := ctrlruntimeclient .ObjectKeyFromObject (pj )
293
+ if err := c .pjclientset .Get (ctx , pjObjKey , pj ); err != nil {
294
+ if apierrors .IsNotFound (err ) {
295
+ // Job could be GC'ed or deleted for other reasons, not to
296
+ // report, this is not a prow error and should not be retried
297
+ logger .Debug ("object no longer exist" )
298
+ return nil , nil , nil
299
+ }
300
+
301
+ return nil , nil , fmt .Errorf ("failed to get prowjob %s: %w" , pjObjKey .String (), err )
302
+ }
303
+ if pj .Status .PrevReportStates [c .GetName ()] == pj .Status .State {
304
+ logger .Info ("Already reported by other threads." )
305
+ return nil , nil , nil
306
+ }
307
+ }
308
+
256
309
ctx , cancel := context .WithTimeout (ctx , 10 * time .Second )
257
310
defer cancel ()
258
311
@@ -276,7 +329,7 @@ func (c *Client) Report(ctx context.Context, logger *logrus.Entry, pj *v1.ProwJo
276
329
}
277
330
278
331
var pjsOnRevisionWithSameLabel v1.ProwJobList
279
- if err := c .lister .List (ctx , & pjsOnRevisionWithSameLabel , ctrlruntimeclient .MatchingLabels (selector )); err != nil {
332
+ if err := c .pjclientset .List (ctx , & pjsOnRevisionWithSameLabel , ctrlruntimeclient .MatchingLabels (selector )); err != nil {
280
333
logger .WithError (err ).WithField ("selector" , selector ).Errorf ("Cannot list prowjob with selector" )
281
334
return nil , nil , err
282
335
}
@@ -353,7 +406,22 @@ func (c *Client) Report(ctx context.Context, logger *logrus.Entry, pj *v1.ProwJo
353
406
}
354
407
355
408
logger .Infof ("Review Complete, reported jobs: %s" , jobNames (toReportJobs ))
356
- return toReportJobs , nil , nil
409
+
410
+ // If return here, the shardedLock will be released, and other threads that
411
+ // are from the same PR will still not understand that it's already
412
+ // reported, as the change of previous report state happens only after the
413
+ // returning of current function from the caller.
414
+ // Ideally the previous report state should be changed here.
415
+ logger .WithField ("job-count" , len (toReportJobs )).Info ("Reported job(s), now will update pj(s)." )
416
+ var err error
417
+ for _ , pjob := range toReportJobs {
418
+ if err = criercommonlib .UpdateReportStateWithRetries (ctx , pjob , logger , c .pjclientset , c .GetName ()); err != nil {
419
+ logger .WithError (err ).Error ("Failed to update report state on prowjob" )
420
+ }
421
+ }
422
+
423
+ // Let caller know that we are done with this job.
424
+ return nil , nil , err
357
425
}
358
426
359
427
func jobNames (jobs []* v1.ProwJob ) []string {
@@ -559,3 +627,18 @@ func ParseReport(message string) *JobReport {
559
627
func (r JobReport ) String () string {
560
628
return fmt .Sprintf ("%s\n %s" , r .Header , r .Message )
561
629
}
630
+
631
+ func lockKeyForPJ (pj * v1.ProwJob ) (* criercommonlib.SimplePull , error ) {
632
+ // TODO(chaodaiG): remove postsubmit once
633
+ // https://github.com/kubernetes/test-infra/issues/22653 is fixed
634
+ if pj .Spec .Type != v1 .PresubmitJob && pj .Spec .Type != v1 .PostsubmitJob {
635
+ return nil , fmt .Errorf ("can only get lock key for presubmit and postsubmit jobs, was %q" , pj .Spec .Type )
636
+ }
637
+ if pj .Spec .Refs == nil {
638
+ return nil , errors .New ("pj.Spec.Refs is nil" )
639
+ }
640
+ if n := len (pj .Spec .Refs .Pulls ); n != 1 {
641
+ return nil , fmt .Errorf ("prowjob doesn't have one but %d pulls" , n )
642
+ }
643
+ return criercommonlib .NewSimplePull (pj .Spec .Refs .Org , pj .Spec .Refs .Repo , pj .Spec .Refs .Pulls [0 ].Number ), nil
644
+ }
0 commit comments