From 583a3cbf9f428ecdd487ad6676465db4b2956a7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Tue, 6 Aug 2024 23:39:06 +0100 Subject: [PATCH] ociregistry: deduplicate and lazily compile global regexes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cmd/cue imports the ociregistry and ociregistry/ociref packages, which cost about 0.1ms and 0.04ms at init time respectively, due to how they compile a few regular expressions at init time. First, two of these are duplicates; ociregistry.IsValidRepoName can be replaced by ociref.IsValidRepository, and ociregistry.IsValidTag can be replaced by ociref.IsValidTag. In both cases, ociref's regular expressions are better documented. Second, ociregistry.IsValidDigest is not a duplicate, but it seems to belong to ociref alongside all the other IsValid funcs, plus ociref already performs similar validity checks in existing APIs. Note that the changes above are a breaking change, but users should be able to switch to the APIs in ociref fairly easily. Third, hide the three global regexp.MustCompile calls in ociref behind sync.OnceValue, meaning that the regular expressions only get compiled when they are first used. With all of the changes combined, we save about 0.15ms and 2300 allocs at init time, which should be small but noticeable for cmd/cue. goos: linux goarch: amd64 cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics │ old │ new │ │ sec/op │ sec/op vs base │ CuelabsDevGoOciOciregistry 108690.0n ± 5% 103.0n ± 73% -99.91% (p=0.002 n=6) CuelabsDevGoOciOciregistryOciref 46316.50n ± 4% 19.00n ± 195% -99.96% (p=0.002 n=6) geomean 70.95µ 44.24n -99.94% │ old │ new │ │ B/op │ B/op vs base │ CuelabsDevGoOciOciregistry 130259.0 ± 0% 947.5 ± 0% -99.27% (p=0.002 n=6) CuelabsDevGoOciOciregistryOciref 61912.0 ± 0% 480.0 ± 0% -99.22% (p=0.002 n=6) geomean 87.70Ki 674.4 -99.25% │ old │ new │ │ allocs/op │ allocs/op vs base │ CuelabsDevGoOciOciregistry 1855.000 ± 0% 2.000 ± 0% -99.89% (p=0.002 n=6) CuelabsDevGoOciOciregistryOciref 442.00 ± 0% 21.00 ± 0% -95.25% (p=0.002 n=6) geomean 905.5 6.481 -99.28% Signed-off-by: Daniel Martí Change-Id: I1be320904649374f768d392c7a104cd2f3c5021e Dispatch-Trailer: {"type":"trybot","CL":1198992,"patchset":1,"ref":"refs/changes/92/1198992/1","targetBranch":"main"} --- ociregistry/internal/ocirequest/request.go | 29 +++++++-------- ociregistry/ociclient/client.go | 3 +- ociregistry/ocimem/registry.go | 3 +- ociregistry/ocimem/writer.go | 3 +- ociregistry/ociref/reference.go | 42 ++++++++++++++-------- ociregistry/valid.go | 30 ---------------- 6 files changed, 49 insertions(+), 61 deletions(-) delete mode 100644 ociregistry/valid.go diff --git a/ociregistry/internal/ocirequest/request.go b/ociregistry/internal/ocirequest/request.go index 1bb19b58..99b28157 100644 --- a/ociregistry/internal/ocirequest/request.go +++ b/ociregistry/internal/ocirequest/request.go @@ -24,6 +24,7 @@ import ( "unicode/utf8" "cuelabs.dev/go/oci/ociregistry" + "cuelabs.dev/go/oci/ociregistry/ociref" ) // ParseError represents an error that can happen when parsing. @@ -225,7 +226,7 @@ func parse(method string, u *url.URL) (*Request, error) { } if ok { rreq.Repo = uploadPath - if !ociregistry.IsValidRepoName(rreq.Repo) { + if !ociref.IsValidRepository(rreq.Repo) { return nil, ociregistry.ErrNameInvalid } if method != "POST" { @@ -234,7 +235,7 @@ func parse(method string, u *url.URL) (*Request, error) { if d := urlq.Get("mount"); d != "" { // end-11 rreq.Digest = d - if !ociregistry.IsValidDigest(rreq.Digest) { + if !ociref.IsValidDigest(rreq.Digest) { return nil, ociregistry.ErrDigestInvalid } rreq.FromRepo = urlq.Get("from") @@ -246,7 +247,7 @@ func parse(method string, u *url.URL) (*Request, error) { rreq.Digest = "" return &rreq, nil } - if !ociregistry.IsValidRepoName(rreq.FromRepo) { + if !ociref.IsValidRepository(rreq.FromRepo) { return nil, ociregistry.ErrNameInvalid } rreq.Kind = ReqBlobMount @@ -255,7 +256,7 @@ func parse(method string, u *url.URL) (*Request, error) { if d := urlq.Get("digest"); d != "" { // end-4b rreq.Digest = d - if !ociregistry.IsValidDigest(d) { + if !ociref.IsValidDigest(d) { return nil, ErrBadlyFormedDigest } rreq.Kind = ReqBlobUploadBlob @@ -276,10 +277,10 @@ func parse(method string, u *url.URL) (*Request, error) { switch lastButOne { case "blobs": rreq.Repo = path - if !ociregistry.IsValidDigest(last) { + if !ociref.IsValidDigest(last) { return nil, ErrBadlyFormedDigest } - if !ociregistry.IsValidRepoName(rreq.Repo) { + if !ociref.IsValidRepository(rreq.Repo) { return nil, ociregistry.ErrNameInvalid } rreq.Digest = last @@ -302,7 +303,7 @@ func parse(method string, u *url.URL) (*Request, error) { return nil, ErrNotFound } rreq.Repo = repo - if !ociregistry.IsValidRepoName(rreq.Repo) { + if !ociref.IsValidRepository(rreq.Repo) { return nil, ociregistry.ErrNameInvalid } uploadID64 := last @@ -326,7 +327,7 @@ func parse(method string, u *url.URL) (*Request, error) { case "PUT": rreq.Kind = ReqBlobCompleteUpload rreq.Digest = urlq.Get("digest") - if !ociregistry.IsValidDigest(rreq.Digest) { + if !ociref.IsValidDigest(rreq.Digest) { return nil, ErrBadlyFormedDigest } default: @@ -335,13 +336,13 @@ func parse(method string, u *url.URL) (*Request, error) { return &rreq, nil case "manifests": rreq.Repo = path - if !ociregistry.IsValidRepoName(rreq.Repo) { + if !ociref.IsValidRepository(rreq.Repo) { return nil, ociregistry.ErrNameInvalid } switch { - case ociregistry.IsValidDigest(last): + case ociref.IsValidDigest(last): rreq.Digest = last - case ociregistry.IsValidTag(last): + case ociref.IsValidTag(last): rreq.Tag = last default: return nil, ErrNotFound @@ -371,20 +372,20 @@ func parse(method string, u *url.URL) (*Request, error) { return nil, ErrMethodNotAllowed } rreq.Repo = path - if !ociregistry.IsValidRepoName(rreq.Repo) { + if !ociref.IsValidRepository(rreq.Repo) { return nil, ociregistry.ErrNameInvalid } rreq.Kind = ReqTagsList return &rreq, nil case "referrers": - if !ociregistry.IsValidDigest(last) { + if !ociref.IsValidDigest(last) { return nil, ErrBadlyFormedDigest } if method != "GET" { return nil, ErrMethodNotAllowed } rreq.Repo = path - if !ociregistry.IsValidRepoName(rreq.Repo) { + if !ociref.IsValidRepository(rreq.Repo) { return nil, ociregistry.ErrNameInvalid } // TODO is there any kind of pagination for referrers? diff --git a/ociregistry/ociclient/client.go b/ociregistry/ociclient/client.go index bc78b328..5c3a39f2 100644 --- a/ociregistry/ociclient/client.go +++ b/ociregistry/ociclient/client.go @@ -35,6 +35,7 @@ import ( "cuelabs.dev/go/oci/ociregistry" "cuelabs.dev/go/oci/ociregistry/internal/ocirequest" "cuelabs.dev/go/oci/ociregistry/ociauth" + "cuelabs.dev/go/oci/ociregistry/ociref" ) // debug enables logging. @@ -166,7 +167,7 @@ func descriptorFromResponse(resp *http.Response, knownDigest digest.Digest, requ } digest := digest.Digest(resp.Header.Get("Docker-Content-Digest")) if digest != "" { - if !ociregistry.IsValidDigest(string(digest)) { + if !ociref.IsValidDigest(string(digest)) { return ociregistry.Descriptor{}, fmt.Errorf("bad digest %q found in response", digest) } } else { diff --git a/ociregistry/ocimem/registry.go b/ociregistry/ocimem/registry.go index 3a99caae..40b3dbd7 100644 --- a/ociregistry/ocimem/registry.go +++ b/ociregistry/ocimem/registry.go @@ -21,6 +21,7 @@ import ( "sync" "cuelabs.dev/go/oci/ociregistry" + "cuelabs.dev/go/oci/ociregistry/ociref" "github.com/opencontainers/go-digest" ) @@ -120,7 +121,7 @@ func (r *Registry) blobForDigest(repoName string, dig ociregistry.Digest) (*blob } func (r *Registry) makeRepo(repoName string) (*repository, error) { - if !ociregistry.IsValidRepoName(repoName) { + if !ociref.IsValidRepository(repoName) { return nil, ociregistry.ErrNameInvalid } if r.repos == nil { diff --git a/ociregistry/ocimem/writer.go b/ociregistry/ocimem/writer.go index 0b298900..a20517c6 100644 --- a/ociregistry/ocimem/writer.go +++ b/ociregistry/ocimem/writer.go @@ -20,6 +20,7 @@ import ( "io" "cuelabs.dev/go/oci/ociregistry" + "cuelabs.dev/go/oci/ociregistry/ociref" "github.com/opencontainers/go-digest" ) @@ -107,7 +108,7 @@ func (r *Registry) PushManifest(ctx context.Context, repoName string, tag string Size: int64(len(data)), } if tag != "" { - if !ociregistry.IsValidTag(tag) { + if !ociref.IsValidTag(tag) { return ociregistry.Descriptor{}, fmt.Errorf("invalid tag") } if r.cfg.ImmutableTags { diff --git a/ociregistry/ociref/reference.go b/ociregistry/ociref/reference.go index 650b041a..04e51fcf 100644 --- a/ociregistry/ociref/reference.go +++ b/ociregistry/ociref/reference.go @@ -19,8 +19,10 @@ import ( "fmt" "regexp" "strings" + "sync" "cuelabs.dev/go/oci/ociregistry" + "github.com/opencontainers/go-digest" ) // The following regular expressions derived from code in the @@ -91,17 +93,23 @@ const ( repoName = pathComponent + `(?:` + `/` + pathComponent + `)*` ) -var referencePat = regexp.MustCompile( - `^(?:` + - `(?:` + `(` + domainAndPort + `)` + `/` + `)?` + // capture 1: host - `(` + repoName + `)` + // capture 2: repository name - `(?:` + `:([^@]+))?` + // capture 3: tag; rely on Go logic to test validity. - `(?:` + `@(.+))?` + // capture 4: digest; rely on go-digest to find issues - `)$`, -) - -var hostPat = regexp.MustCompile(`^(?:` + domainAndPort + `)$`) -var repoPat = regexp.MustCompile(`^(?:` + repoName + `)$`) +var referencePat = sync.OnceValue(func() *regexp.Regexp { + return regexp.MustCompile( + `^(?:` + + `(?:` + `(` + domainAndPort + `)` + `/` + `)?` + // capture 1: host + `(` + repoName + `)` + // capture 2: repository name + `(?:` + `:([^@]+))?` + // capture 3: tag; rely on Go logic to test validity. + `(?:` + `@(.+))?` + // capture 4: digest; rely on go-digest to find issues + `)$`, + ) +}) + +var hostPat = sync.OnceValue(func() *regexp.Regexp { + return regexp.MustCompile(`^(?:` + domainAndPort + `)$`) +}) +var repoPat = sync.OnceValue(func() *regexp.Regexp { + return regexp.MustCompile(`^(?:` + repoName + `)$`) +}) // Reference represents an entry in an OCI repository. type Reference struct { @@ -125,13 +133,13 @@ type Reference struct { // IsValidHost reports whether s is a valid host (or host:port) part of a reference string. func IsValidHost(s string) bool { - return hostPat.MatchString(s) + return hostPat().MatchString(s) } // IsValidHost reports whether s is a valid repository part // of a reference string. func IsValidRepository(s string) bool { - return repoPat.MatchString(s) + return repoPat().MatchString(s) } // IsValidTag reports whether s is a valid reference tag. @@ -139,6 +147,12 @@ func IsValidTag(s string) bool { return checkTag(s) == nil } +// IsValidDigest reports whether the digest d is well formed. +func IsValidDigest(d string) bool { + _, err := digest.Parse(d) + return err == nil +} + // Parse parses a reference string that must include // a host name (or host:port pair) component. // @@ -165,7 +179,7 @@ func Parse(refStr string) (Reference, error) { // Unlike "docker pull" however, there is no default registry: when // presented with a bare repository name, the Host field will be empty. func ParseRelative(refStr string) (Reference, error) { - m := referencePat.FindStringSubmatch(refStr) + m := referencePat().FindStringSubmatch(refStr) if m == nil { return Reference{}, fmt.Errorf("invalid reference syntax (%q)", refStr) } diff --git a/ociregistry/valid.go b/ociregistry/valid.go deleted file mode 100644 index 7486fa19..00000000 --- a/ociregistry/valid.go +++ /dev/null @@ -1,30 +0,0 @@ -package ociregistry - -import ( - "regexp" - - "github.com/opencontainers/go-digest" -) - -var ( - tagPattern = regexp.MustCompile(`^[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}$`) - repoNamePattern = regexp.MustCompile(`^[a-z0-9]+([._-][a-z0-9]+)*(/[a-z0-9]+([._-][a-z0-9]+)*)*$`) -) - -// IsValidRepoName reports whether the given repository -// name is valid according to the specification. -func IsValidRepoName(repoName string) bool { - return repoNamePattern.MatchString(repoName) -} - -// IsValidTag reports whether the digest d is valid -// according to the specification. -func IsValidTag(tag string) bool { - return tagPattern.MatchString(tag) -} - -// IsValidDigest reports whether the digest d is well formed. -func IsValidDigest(d string) bool { - _, err := digest.Parse(d) - return err == nil -}