From 72eba2818541094e58dde43c0a7b68ea87095beb Mon Sep 17 00:00:00 2001 From: Simon Murray Date: Thu, 5 Sep 2024 16:47:29 +0100 Subject: [PATCH] Add Token Caching (#115) Transpires that validating tokens is very expensive, so add a cache to identity to speed this up. --- charts/identity/Chart.yaml | 4 ++-- pkg/middleware/openapi/remote/authorizer.go | 20 +++++++++++++++++++ pkg/oauth2/oauth2.go | 22 ++++++++++++++++----- pkg/oauth2/oauth2_test.go | 1 + pkg/oauth2/tokens.go | 15 ++++++++++++++ 5 files changed, 55 insertions(+), 7 deletions(-) diff --git a/charts/identity/Chart.yaml b/charts/identity/Chart.yaml index fa253698..f5842e81 100644 --- a/charts/identity/Chart.yaml +++ b/charts/identity/Chart.yaml @@ -4,8 +4,8 @@ description: A Helm chart for deploying Unikorn's IdP type: application -version: v0.2.35 -appVersion: v0.2.35 +version: v0.2.36 +appVersion: v0.2.36 icon: https://raw.githubusercontent.com/unikorn-cloud/assets/main/images/logos/dark-on-light/icon.png diff --git a/pkg/middleware/openapi/remote/authorizer.go b/pkg/middleware/openapi/remote/authorizer.go index 63c8c786..14fb82f7 100644 --- a/pkg/middleware/openapi/remote/authorizer.go +++ b/pkg/middleware/openapi/remote/authorizer.go @@ -22,6 +22,7 @@ import ( "net/http" "strconv" "strings" + "time" "github.com/coreos/go-oidc/v3/oidc" "github.com/getkin/kin-openapi/openapi3filter" @@ -33,6 +34,8 @@ import ( "github.com/unikorn-cloud/identity/pkg/middleware/openapi" identityapi "github.com/unikorn-cloud/identity/pkg/openapi" + "k8s.io/apimachinery/pkg/util/cache" + "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -41,6 +44,9 @@ type Authorizer struct { client client.Client options *identityclient.Options clientOptions *coreclient.HTTPClientOptions + // tokenCache is used to enhance interaction as the validation is a + // very expensive operation. + tokenCache *cache.LRUExpireCache } var _ openapi.Authorizer = &Authorizer{} @@ -51,6 +57,9 @@ func NewAuthorizer(client client.Client, options *identityclient.Options, client client: client, options: options, clientOptions: clientOptions, + // TODO: make this configurable, possibly even a shared flag with the + // authorizer to maintain consistency. + tokenCache: cache.NewLRUExpireCache(4096), } } @@ -121,6 +130,15 @@ func (a *Authorizer) authorizeOAuth2(r *http.Request) (string, *identityapi.User return "", nil, errors.OAuth2InvalidRequest("authorization scheme not allowed").WithValues("scheme", authorizationScheme) } + if value, ok := a.tokenCache.Get(rawToken); ok { + claims, ok := value.(*identityapi.Userinfo) + if !ok { + return "", nil, errors.OAuth2ServerError("invalid token cache data") + } + + return rawToken, claims, nil + } + // The identity client neatly wraps up TLS... identity := identityclient.New(a.client, a.options, a.clientOptions) @@ -171,6 +189,8 @@ func (a *Authorizer) authorizeOAuth2(r *http.Request) (string, *identityapi.User return "", nil, errors.OAuth2ServerError("failed to extrac user information").WithError(err) } + a.tokenCache.Add(rawToken, claims, time.Until(time.Unix(int64(*claims.Exp), 0))) + return rawToken, claims, nil } diff --git a/pkg/oauth2/oauth2.go b/pkg/oauth2/oauth2.go index 8f98ae3b..a26d449d 100644 --- a/pkg/oauth2/oauth2.go +++ b/pkg/oauth2/oauth2.go @@ -51,6 +51,8 @@ import ( "github.com/unikorn-cloud/identity/pkg/rbac" "github.com/unikorn-cloud/identity/pkg/util" + "k8s.io/apimachinery/pkg/util/cache" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -71,6 +73,10 @@ type Options struct { // lifetime so we can "guarantee" ours will expire before theirs and force // a refresh before any errors can come from the IdP. TokenLeewayDuration time.Duration + + // TokenCacheSize is used to control the size of the LRU cache for token validation + // checks. This bounds the memory use to prevent DoS attacks. + TokenCacheSize int } func (o *Options) AddFlags(f *pflag.FlagSet) { @@ -78,6 +84,7 @@ func (o *Options) AddFlags(f *pflag.FlagSet) { f.DurationVar(&o.RefreshTokenDuration, "refresh-token-duration", 0, "Maximum time a refresh token can be active for.") f.DurationVar(&o.TokenVerificationLeeway, "token-verification-leeway", 0, "How mush leeway to permit for verification of token validity.") f.DurationVar(&o.TokenLeewayDuration, "token-leeway", time.Minute, "How long to remove from the provider token expiry to account for network and processing latency.") + f.IntVar(&o.TokenCacheSize, "token-cache-size", 8192, "How many token cache entries to allow.") } // Authenticator provides Keystone authentication functionality. @@ -92,17 +99,22 @@ type Authenticator struct { issuer *jose.JWTIssuer rbac *rbac.RBAC + + // tokenCache is used to enhance interaction as the validation is a + // very expensive operation. + tokenCache *cache.LRUExpireCache } // New returns a new authenticator with required fields populated. // You must call AddFlags after this. func New(options *Options, namespace string, client client.Client, issuer *jose.JWTIssuer, rbac *rbac.RBAC) *Authenticator { return &Authenticator{ - options: options, - namespace: namespace, - client: client, - issuer: issuer, - rbac: rbac, + options: options, + namespace: namespace, + client: client, + issuer: issuer, + rbac: rbac, + tokenCache: cache.NewLRUExpireCache(options.TokenCacheSize), } } diff --git a/pkg/oauth2/oauth2_test.go b/pkg/oauth2/oauth2_test.go index dc2b7dce..3ad77dfb 100644 --- a/pkg/oauth2/oauth2_test.go +++ b/pkg/oauth2/oauth2_test.go @@ -61,6 +61,7 @@ func TestTokens(t *testing.T) { AccessTokenDuration: accessTokenDuration, RefreshTokenDuration: refreshTokenDuration, TokenLeewayDuration: accessTokenDuration, + TokenCacheSize: 1024, } authenticator := oauth2.New(options, josetesting.Namespace, client, issuer, nil) diff --git a/pkg/oauth2/tokens.go b/pkg/oauth2/tokens.go index 4ebf8c6f..f9d78412 100644 --- a/pkg/oauth2/tokens.go +++ b/pkg/oauth2/tokens.go @@ -198,6 +198,19 @@ type VerifyInfo struct { // Verify checks the access token parses and validates. func (a *Authenticator) Verify(ctx context.Context, info *VerifyInfo) (*AccessTokenClaims, error) { + // The verification process is very expensive, so we add a cache in here to + // improve interactivity. Once this is in place, then the network latency becomes + // the bottle neck, presumably this is the TLS handshake. Similar code can be + // in the remote client-side verification middleware. + if value, ok := a.tokenCache.Get(info.Token); ok { + claims, ok := value.(*AccessTokenClaims) + if !ok { + return nil, fmt.Errorf("%w: failed to assert cache claims", ErrTokenVerification) + } + + return claims, nil + } + // Parse and verify the claims with the public key. claims := &AccessTokenClaims{} @@ -218,5 +231,7 @@ func (a *Authenticator) Verify(ctx context.Context, info *VerifyInfo) (*AccessTo return nil, fmt.Errorf("failed to validate claims: %w", err) } + a.tokenCache.Add(info.Token, claims, time.Until(claims.Expiry.Time())) + return claims, nil }