Skip to content

Commit b457ba9

Browse files
author
AbstractionFactory
authored
Fixes #285: Unnecessary file upload (#288)
* Fixes #285: Unnecessary file upload This change attempts to fix #285 by only uploading the module/provider index when it has actually changed Signed-off-by: AbstractionFactory <[email protected]> * Review fix Signed-off-by: AbstractionFactory <[email protected]> * Added comments Signed-off-by: AbstractionFactory <[email protected]> --------- Signed-off-by: AbstractionFactory <[email protected]>
1 parent 77976b9 commit b457ba9

File tree

4 files changed

+144
-8
lines changed

4 files changed

+144
-8
lines changed

backend/internal/moduleindex/generator.go

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,13 @@ func (g generator) generate(ctx context.Context, moduleList []module.Addr, block
228228
blocked, blockedReason := g.blocklist.IsModuleBlocked(moduleAddr.Addr)
229229

230230
moduleIndexPath := path.Join(moduleAddr.Namespace, moduleAddr.Name, moduleAddr.TargetSystem, "index.json")
231+
// We are fetching the module entry from the megaindex and storing it
232+
// further down below as a separate index file so the frontend has an easier time
233+
// fetching it.
231234
entry := modules.GetModule(moduleAddr.Addr)
235+
// originalEntry serves the purpose of being an original copy to compare to
236+
// so we don't write the index if it hasn't actually been modified to save costs.
237+
var originalEntry *Module
232238
needsAdd := false
233239
if entry == nil {
234240
entry = &Module{
@@ -239,6 +245,8 @@ func (g generator) generate(ctx context.Context, moduleList []module.Addr, block
239245
BlockedReason: blockedReason,
240246
}
241247
needsAdd = true
248+
} else {
249+
originalEntry = entry.DeepCopy()
242250
}
243251

244252
if entry.IsBlocked != blocked {
@@ -361,12 +369,18 @@ func (g generator) generate(ctx context.Context, moduleList []module.Addr, block
361369
modulesToAdd = append(modulesToAdd, entry)
362370
lock.Unlock()
363371
}
364-
versionListing, err := json.Marshal(entry)
365-
if err != nil {
366-
return fmt.Errorf("failed to marshal module index for %s (%w)", entry.Addr, err)
367-
}
368-
if err := g.storage.WriteFile(ctx, indexstorage.Path(moduleIndexPath), versionListing); err != nil {
369-
return fmt.Errorf("failed to write the module index for %s (%w)", entry.Addr, err)
372+
// Here we compare the module entry to its original copy to make sure
373+
// we are only writing this index if needed. This is needed because writes
374+
// on R2 cost money, whereas reads don't and updating all the provider and
375+
// module indexes on every run costs ~300$ per month.
376+
if originalEntry == nil || !originalEntry.Equals(entry) {
377+
versionListing, err := json.Marshal(entry)
378+
if err != nil {
379+
return fmt.Errorf("failed to marshal module index for %s (%w)", entry.Addr, err)
380+
}
381+
if err := g.storage.WriteFile(ctx, indexstorage.Path(moduleIndexPath), versionListing); err != nil {
382+
return fmt.Errorf("failed to write the module index for %s (%w)", entry.Addr, err)
383+
}
370384
}
371385
}
372386

backend/internal/moduleindex/module.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010

1111
// swagger:model Module
1212
type Module struct {
13+
// If you add a field here, update Equals() and DeepCopy() below.
14+
1315
// required: true
1416
Addr ModuleAddr `json:"addr"`
1517
// required: true
@@ -36,6 +38,43 @@ type Module struct {
3638
UpstreamPopularity int `json:"upstream_popularity"`
3739
// UpstreamForkCount contains the number of forks of the upstream repository.
3840
UpstreamForkCount int `json:"upstream_fork_count"`
41+
42+
// If you add a field here, update Equals() and DeepCopy() below.
43+
}
44+
45+
// Equals compares every parameter of the two modules and returns true if both are equal on a deep comparison.
46+
func (m *Module) Equals(other *Module) bool {
47+
if m == other {
48+
return true
49+
}
50+
if m == nil || other == nil {
51+
return false
52+
}
53+
return m.Addr.Equals(other.Addr.Addr) && m.Description == other.Description &&
54+
slices.Equal(m.Versions, other.Versions) && m.IsBlocked == other.IsBlocked &&
55+
m.BlockedReason == other.BlockedReason && m.Popularity == other.Popularity && m.ForkCount == other.ForkCount &&
56+
m.ForkOfLink == other.ForkOfLink && m.ForkOf.Equals(other.ForkOf.Addr) &&
57+
m.UpstreamPopularity == other.UpstreamPopularity && m.UpstreamForkCount == other.UpstreamForkCount
58+
}
59+
60+
// DeepCopy creates a deep copy of the module, ensuring that all new data structures are independent.
61+
func (m *Module) DeepCopy() *Module {
62+
versions := make([]ModuleVersionDescriptor, len(m.Versions))
63+
copy(versions, m.Versions)
64+
65+
return &Module{
66+
Addr: m.Addr,
67+
Description: m.Description,
68+
Versions: versions,
69+
IsBlocked: m.IsBlocked,
70+
BlockedReason: m.BlockedReason,
71+
Popularity: m.Popularity,
72+
ForkCount: m.ForkCount,
73+
ForkOfLink: m.ForkOfLink,
74+
ForkOf: m.ForkOf,
75+
UpstreamPopularity: m.UpstreamPopularity,
76+
UpstreamForkCount: m.UpstreamForkCount,
77+
}
3978
}
4079

4180
func (m *Module) Validate() error {

backend/internal/providerindex/generator.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,13 @@ func (d *documentationGenerator) scrape(ctx context.Context, providers []provide
195195
eg.Go(func() error {
196196
blocked, blockedReason := d.blocklist.IsProviderBlocked(addr)
197197

198+
// We are fetching the provider entry from the megaindex and storing it
199+
// further down below as a separate index file so the frontend has an easier time
200+
// fetching it.
198201
providerEntry := existingProviders.GetProvider(addr)
202+
// originalProviderEntry serves the purpose of being an original copy to compare to
203+
// so we don't write the index if it hasn't actually been modified to save costs.
204+
var originalProviderEntry *providertypes.Provider
199205
needsAdd := false
200206
if providerEntry == nil {
201207
providerEntry = &providertypes.Provider{
@@ -207,6 +213,8 @@ func (d *documentationGenerator) scrape(ctx context.Context, providers []provide
207213
BlockedReason: blockedReason,
208214
}
209215
needsAdd = true
216+
} else {
217+
originalProviderEntry = providerEntry.DeepCopy()
210218
}
211219

212220
// scrape the docs into their own directory
@@ -223,8 +231,14 @@ func (d *documentationGenerator) scrape(ctx context.Context, providers []provide
223231
return err
224232
}
225233

226-
if err := d.destination.StoreProvider(ctx, *providerEntry); err != nil {
227-
return fmt.Errorf("failed to store provider %s (%w)", addr, err)
234+
// Here we compare the provider entry to its original copy to make sure
235+
// we are only writing this index if needed. This is needed because writes
236+
// on R2 cost money, whereas reads don't and updating all the provider and
237+
// module indexes on every run costs ~300$ per month.
238+
if originalProviderEntry == nil || !originalProviderEntry.Equals(providerEntry) {
239+
if err := d.destination.StoreProvider(ctx, *providerEntry); err != nil {
240+
return fmt.Errorf("failed to store provider %s (%w)", addr, err)
241+
}
228242
}
229243

230244
if needsAdd {

backend/internal/providerindex/providertypes/provider.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010
//
1111
// swagger:model Provider
1212
type Provider struct {
13+
// If you add something here, don't forget to update the Equals() and DeepCopy() functions below.
14+
1315
// Addr holds the address of a provider. It can be split by / to obtain a namespace and name.
1416
//
1517
// required: true
@@ -61,6 +63,73 @@ type Provider struct {
6163
IsBlocked bool `json:"is_blocked"`
6264
// required: false
6365
BlockedReason string `json:"blocked_reason,omitempty"`
66+
67+
// If you add something here, don't forget to update the Equals() and DeepCopy() functions below.
68+
}
69+
70+
// Equals returns true if and only of all parameters of the two providers are equal (with a deep comparison).
71+
func (p *Provider) Equals(other *Provider) bool {
72+
if p == other {
73+
return true
74+
}
75+
if p == nil || other == nil {
76+
return false
77+
}
78+
if len(p.ReverseAliases) != len(other.ReverseAliases) {
79+
return false
80+
} else {
81+
for i := range len(p.ReverseAliases) {
82+
if !p.ReverseAliases[i].Equals(other.ReverseAliases[i].Addr) {
83+
return false
84+
}
85+
}
86+
}
87+
return p.Addr.Equals(other.Addr.Addr) && slices.Equal(p.Warnings, other.Warnings) && p.Link == other.Link &&
88+
(p.CanonicalAddr == other.CanonicalAddr || p.CanonicalAddr.Equals(other.CanonicalAddr.Addr)) &&
89+
p.Description == other.Description && p.Popularity == other.Popularity && p.ForkCount == other.ForkCount &&
90+
p.ForkOfLink == other.ForkOfLink && p.ForkOf == other.ForkOf &&
91+
p.UpstreamPopularity == other.UpstreamPopularity && p.UpstreamForkCount == other.UpstreamForkCount &&
92+
slices.Equal(p.Versions, other.Versions) && p.BlockedReason == other.BlockedReason
93+
}
94+
95+
// DeepCopy creates a deep copy of the Provider.
96+
func (p *Provider) DeepCopy() *Provider {
97+
warnings := make([]string, len(p.Warnings))
98+
copy(warnings, p.Warnings)
99+
100+
var canonicalAddr *ProviderAddr
101+
if p.CanonicalAddr != nil {
102+
canonicalAddr = &ProviderAddr{
103+
Addr: p.CanonicalAddr.Addr,
104+
Display: p.CanonicalAddr.Display,
105+
Namespace: p.CanonicalAddr.Namespace,
106+
Name: p.CanonicalAddr.Name,
107+
}
108+
}
109+
110+
reverseAliases := make([]ProviderAddr, len(p.ReverseAliases))
111+
copy(reverseAliases, p.ReverseAliases)
112+
113+
versions := make([]ProviderVersionDescriptor, len(p.Versions))
114+
copy(versions, p.Versions)
115+
116+
return &Provider{
117+
Addr: p.Addr,
118+
Warnings: warnings,
119+
Link: p.Link,
120+
CanonicalAddr: canonicalAddr,
121+
ReverseAliases: reverseAliases,
122+
Description: p.Description,
123+
Popularity: p.Popularity,
124+
ForkCount: p.ForkCount,
125+
ForkOfLink: p.ForkOfLink,
126+
ForkOf: p.ForkOf,
127+
UpstreamPopularity: p.UpstreamPopularity,
128+
UpstreamForkCount: p.UpstreamForkCount,
129+
Versions: versions,
130+
IsBlocked: p.IsBlocked,
131+
BlockedReason: p.BlockedReason,
132+
}
64133
}
65134

66135
func (p *Provider) Compare(other Provider) int {

0 commit comments

Comments
 (0)