Skip to content

Commit

Permalink
Merge pull request #649 from haoming29/registry-api-p4
Browse files Browse the repository at this point in the history
Take a URL from config value to fetch institution list for Registry
  • Loading branch information
haoming29 authored Jan 22, 2024
2 parents a719bc0 + 40d056b commit cb64873
Show file tree
Hide file tree
Showing 7 changed files with 545 additions and 6 deletions.
4 changes: 3 additions & 1 deletion config/resources/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ Origin:
EnableUI: true
EnableWrite: true
SelfTest: true
Registry:
InstitutionsUrlReloadMinutes: 15m
Monitoring:
PortLower: 9930
PortHigher: 9999
Expand All @@ -55,7 +57,7 @@ Transport:
ExpectContinueTimeout: 1s
ResponseHeaderTimeout: 10s
OIDC:
AuthorizationEndpoint: "https://cilogon.org/authorize"
AuthorizationEndpoint: "https://cilogon.org/authorize"
DeviceAuthEndpoint: "https://cilogon.org/oauth2/device_authorization"
TokenEndpoint: "https://cilogon.org/oauth2/token"
UserInfoEndpoint: "https://cilogon.org/oauth2/userinfo"
Expand Down
41 changes: 39 additions & 2 deletions docs/parameters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ type: object
default: none
components: ["director"]
---

############################
# Log-Level Configs #
############################
Expand Down Expand Up @@ -206,7 +205,7 @@ name: Federation.TopologyReloadInterval
description: >-
The frequency, in minutes, that topology should be reloaded.
type: duration
osdf_default: 10
osdf_default: 10m
default: none
components: ["director", "nsregistry"]
---
Expand Down Expand Up @@ -600,10 +599,48 @@ description: >-
id: https://osg-htc.org/iid/01y2jtd41
```
Note that this value will take precedence over Registry.InstitutionsUrl if both are set
type: object
default: none
components: ["nsregistry"]
---
name: Registry.InstitutionsUrl
description: >-
A url to get a list of available institutions for users to register their namespaces to.
The url must accept a GET request with 200 response in JSON/YAML content with the following format:
JSON:
```JSON
[
{
"name": "University of Wisconsin - Madison",
"id": " https://osg-htc.org/iid/01y2jtd41"
}
]
```
YAML:
```yaml
---
- name: University of Wisconsin - Madison
id: " https://osg-htc.org/iid/01y2jtd41"
```
where the id field will be stored in registry database and must be unique, and name field will be displayed in UI as the option.
Note that Pelican will cache the response of the url in a TTL cache with default refresh time of 15 minutes.
Also note that Registry.Institutions will take precedence over this value if both are set.
type: url
default: none
components: ["nsregistry"]
---
name: Registry.InstitutionsUrlReloadMinutes
description: >-
Number of minutes that the Registry.InstitutionsUrl will be reloaded into the TTL cache.
type: duration
default: 15m
components: ["nsregistry"]
---
############################
# Server-level configs #
############################
Expand Down
3 changes: 3 additions & 0 deletions launchers/registry_serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ func RegistryServe(ctx context.Context, engine *gin.Engine, egrp *errgroup.Group
if err := web_ui.ConfigOAuthClientAPIs(engine); err != nil {
return err
}
if err := registry.InitInstConfig(ctx, egrp); err != nil {
return err
}
}

rootRouterGroup := engine.Group("/")
Expand Down
2 changes: 2 additions & 0 deletions param/parameters.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions param/parameters_struct.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

177 changes: 174 additions & 3 deletions registry/registry_ui.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,26 @@
package registry

import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"reflect"
"strconv"
"strings"
"sync"
"time"

"github.com/gin-gonic/gin"
"github.com/jellydator/ttlcache/v3"
"github.com/pelicanplatform/pelican/config"
"github.com/pelicanplatform/pelican/param"
"github.com/pelicanplatform/pelican/web_ui"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup"
)

type (
Expand All @@ -53,8 +60,8 @@ type (
}

Institution struct {
Name string `mapstructure:"name" json:"name"`
ID string `mapstructure:"id" json:"id"`
Name string `mapstructure:"name" json:"name" yaml:"name"`
ID string `mapstructure:"id" json:"id" yaml:"id"`
}
)

Expand All @@ -66,7 +73,9 @@ const (
)

var (
registrationFields []registrationField
registrationFields []registrationField
institutionsCache *ttlcache.Cache[string, []Institution]
institutionsCacheMutex = sync.RWMutex{}
)

func init() {
Expand Down Expand Up @@ -168,6 +177,88 @@ func excludePubKey(nss []*Namespace) (nssNew []NamespaceWOPubkey) {
return
}

func checkUniqueInstitutions(insts []Institution) bool {
repeatMap := make(map[string]bool)
for _, inst := range insts {
if repeatMap[inst.ID] {
return false
} else {
repeatMap[inst.ID] = true
}
}
return true
}

func getCachedInstitutions() (inst []Institution, intError error, extError error) {
if institutionsCache == nil {
return nil, errors.New("institutionsCache isn't initialized"), errors.New("Internal institution cache wasn't initialized")
}
instUrlStr := param.Registry_InstitutionsUrl.GetString()
if instUrlStr == "" {
intError = errors.New("Bad server configuration. Registry.InstitutionsUrl is unset")
extError = errors.New("Bad server configuration. Registry.InstitutionsUrl is unset")
return
}
instUrl, err := url.Parse(instUrlStr)
if err != nil {
intError = errors.Wrap(err, "Bad server configuration. Registry.InstitutionsUrl is invalid")
extError = errors.New("Bad server configuration. Registry.InstitutionsUrl is invalid")
return
}
if !institutionsCache.Has(instUrl.String()) {
log.Info("Cache miss for institutions TTL cache. Will fetch from source.")
client := &http.Client{Transport: config.GetTransport()}
req, err := http.NewRequest("GET", instUrl.String(), nil)
if err != nil {
intError = errors.Wrap(err, "Error making a request when fetching institution list")
extError = errors.New("Error when creating a request to fetch institution from remote url.")
return
}
res, err := client.Do(req)
if err != nil {
intError = errors.Wrap(err, "Error response when fetching institution list")
extError = errors.New("Error from response when fetching institution from remote url.")
return
}
if res.StatusCode != 200 {
intError = errors.Wrap(err, fmt.Sprintf("Error response when fetching institution list with code %d", res.StatusCode))
extError = errors.New(fmt.Sprint("Error when fetching institution from remote url, remote server error with code: ", res.StatusCode))
return
}
resBody, err := io.ReadAll(res.Body)
if err != nil {
intError = errors.Wrap(err, "Error reading response body when fetching institution list")
extError = errors.New("Error read response when fetching institution from remote url.")
return
}
institutions := []Institution{}
if err = json.Unmarshal(resBody, &institutions); err != nil {
intError = errors.Wrap(err, "Error parsing response body when fetching institution list")
extError = errors.New("Error parsing response when fetching institution from remote url.")
return
}
institutionsCacheMutex.Lock()
defer institutionsCacheMutex.Unlock()
institutionsCache.Set(instUrl.String(), institutions, ttlcache.DefaultTTL)
return institutions, nil, nil
} else {
institutionsCacheMutex.RLock()
defer institutionsCacheMutex.RUnlock()
institutions := institutionsCache.Get(instUrl.String())
if institutions.Value() == nil {
intError = errors.New(fmt.Sprint("Fail to get institutions from internal TTL cache, value is nil from key: ", instUrl))
extError = errors.New("Fail to get institutions from internal TTL cache")
return
}
if institutions.IsExpired() {
intError = errors.New(fmt.Sprintf("Cached institution with key %q is expired at %v", institutions.Key(), institutions.ExpiresAt()))
extError = errors.New("Expired institution cache")
return
}
return institutions.Value(), nil, nil
}
}

// List all namespaces in the registry.
// For authenticated users, it returns all namespaces.
// For non-authenticated users, it returns namespaces with AdminMetadata.Status = Approved
Expand Down Expand Up @@ -522,6 +613,22 @@ func getNamespaceJWKS(ctx *gin.Context) {
}

func listInstitutions(ctx *gin.Context) {
// When Registry.InstitutionsUrl is set and Registry.Institutions is unset
if institutionsCache != nil {
insts, intErr, extErr := getCachedInstitutions()
if intErr != nil || extErr != nil {
if intErr != nil {
log.Error(intErr)
}
if extErr != nil {
ctx.JSON(http.StatusInternalServerError, gin.H{"error": extErr.Error()})
}
return
}
ctx.JSON(http.StatusOK, insts)
return
}
// When Registry.Institutions is set
institutions := []Institution{}
if err := param.Registry_Institutions.Unmarshal(&institutions); err != nil {
log.Error("Fail to read server configuration of institutions", err)
Expand Down Expand Up @@ -575,3 +682,67 @@ func RegisterRegistryWebAPI(router *gin.RouterGroup) error {
}
return nil
}

// Initialize institutions list
func InitInstConfig(ctx context.Context, egrp *errgroup.Group) error {
institutions := []Institution{}
if err := param.Registry_Institutions.Unmarshal(&institutions); err != nil {
log.Error("Fail to read Registry.Institutions. Make sure you had the correct format", err)
return errors.Wrap(err, "Fail to read Registry.Institutions. Make sure you had the correct format")
}

if param.Registry_InstitutionsUrl.GetString() != "" {
// Read from Registry.Institutions if Registry.InstitutionsUrl is empty
// or Registry.Institutions and Registry.InstitutionsUrl are both set
if len(institutions) > 0 {
log.Warning("Registry.Institutions and Registry.InstitutionsUrl are both set. Registry.InstitutionsUrl is ignored")
if !checkUniqueInstitutions(institutions) {
return errors.Errorf("Institution IDs read from config are not unique")
}
// return here so that we don't init the institution url cache
return nil
}

_, err := url.Parse(param.Registry_InstitutionsUrl.GetString())
if err != nil {
log.Error("Invalid Registry.InstitutionsUrl: ", err)
return errors.Wrap(err, "Invalid Registry.InstitutionsUrl")
}
instCacheTTL := param.Registry_InstitutionsUrlReloadMinutes.GetDuration()

institutionsCache = ttlcache.New[string, []Institution](ttlcache.WithTTL[string, []Institution](instCacheTTL))

go institutionsCache.Start()

egrp.Go(func() error {
<-ctx.Done()
institutionsCacheMutex.Lock()
defer institutionsCacheMutex.Unlock()
log.Info("Gracefully stopping institution TTL cache eviction...")
if institutionsCache != nil {
institutionsCache.DeleteAll()
institutionsCache.Stop()
} else {
log.Info("Institution TTL cache is nil, stop clean up process.")
}
return nil
})

// Try to populate the cache at the server start. If error occured, it's non-blocking
cachedInsts, intErr, _ := getCachedInstitutions()
if intErr != nil {
log.Warning("Failed to populate institution cache. Error: ", intErr)
} else {
if !checkUniqueInstitutions(cachedInsts) {
return errors.Errorf("Institution IDs read from config are not unique")
}
log.Infof("Successfully populated institution TTL cache with %d entries", len(institutionsCache.Get(institutionsCache.Keys()[0]).Value()))
}
}

if !checkUniqueInstitutions(institutions) {
return errors.Errorf("Institution IDs read from config are not unique")
}
// Else we will read from Registry.Institutions. No extra action needed.
return nil
}
Loading

0 comments on commit cb64873

Please sign in to comment.