Skip to content

Commit 645c420

Browse files
committed
Allow syncing large PG databases with transaction ID wraparounds
1 parent b8a188a commit 645c420

10 files changed

Lines changed: 342 additions & 93 deletions

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ benchmark:
4646
devbox run "time psql postgres://127.0.0.1:54321/bemidb < ./benchmark/queries.sql"
4747

4848
pg-init:
49-
devbox run initdb &&
49+
devbox run initdb && \
5050
sed -i "s/#log_statement = 'none'/log_statement = 'all'/g" ./.devbox/virtenv/postgresql/data/postgresql.conf && \
5151
sed -i "s/#logging_collector = off/logging_collector = on/g" ./.devbox/virtenv/postgresql/data/postgresql.conf && \
5252
sed -i "s/#log_directory = 'log'/log_directory = 'log'/g" ./.devbox/virtenv/postgresql/data/postgresql.conf

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ It consists of a single binary that seamlessly connects to a Postgres database,
3434
- **Run complex analytical queries like it's your Postgres database**. Without worrying about performance impact and indexing.
3535
- **Simplify your data stack down to a single binary**. No complex setup, no data movement, no weird acronyms like CDC, ETL, DW.
3636
- **Integrate with Postgres-compatible tools and services**. Querying and visualizing data with BI tools, notebooks, and ORMs.
37-
- **Automatically centralize all data in a data lakehouse**. Using Iceberg tables with Parquet data files on object storage.
37+
- **Automatically centralize all data in a data lakehouse**. Using Iceberg tables with Parquet data files in object storage.
3838
- **Continuously archive data from your Postgres database**. Keeping and querying historical data without affecting the main database.
3939

4040
## Quickstart

devbox.lock

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -137,127 +137,127 @@
137137
}
138138
},
139139
"postgresql@latest": {
140-
"last_modified": "2024-10-24T16:50:28Z",
140+
"last_modified": "2025-03-25T17:32:05Z",
141141
"plugin_version": "0.0.2",
142-
"resolved": "github:NixOS/nixpkgs/63487b2f26fa065cfeeaa47dddb08e2856ba53e8#postgresql",
142+
"resolved": "github:NixOS/nixpkgs/25d1b84f5c90632a623c48d83a2faf156451e6b1#postgresql",
143143
"source": "devbox-search",
144-
"version": "16.4",
144+
"version": "17.4",
145145
"systems": {
146146
"aarch64-darwin": {
147147
"outputs": [
148148
{
149149
"name": "out",
150-
"path": "/nix/store/6dzxj78wph840cpwslh96s4gpm0iwch2-postgresql-16.4",
150+
"path": "/nix/store/prh52g9iwjdddxbv4n0b52gbnlxnnk6w-postgresql-17.4",
151151
"default": true
152152
},
153153
{
154154
"name": "man",
155-
"path": "/nix/store/z1n2vh799a5icpaxbrjfqsasagb276bk-postgresql-16.4-man",
155+
"path": "/nix/store/il144892arv36x68b5y95bkvrq32ym91-postgresql-17.4-man",
156156
"default": true
157157
},
158158
{
159159
"name": "dev",
160-
"path": "/nix/store/afjpl8ilq8s6j6zh4qqyy6mxz3v2xbav-postgresql-16.4-dev"
160+
"path": "/nix/store/7gf8hy13r7li2balcini7004aml54l5n-postgresql-17.4-dev"
161161
},
162162
{
163163
"name": "doc",
164-
"path": "/nix/store/ry9d9by692xj92y5b9j6z0aa5y3lh3px-postgresql-16.4-doc"
164+
"path": "/nix/store/6x00505hxzfwjfpk15v6p4qqnbpk5dza-postgresql-17.4-doc"
165165
},
166166
{
167167
"name": "lib",
168-
"path": "/nix/store/d1im42w02x8gl2y380r4hgj8xgkkkbwc-postgresql-16.4-lib"
168+
"path": "/nix/store/c9g6v34cjsf308m9xzcs7figc1vgbbw3-postgresql-17.4-lib"
169169
}
170170
],
171-
"store_path": "/nix/store/6dzxj78wph840cpwslh96s4gpm0iwch2-postgresql-16.4"
171+
"store_path": "/nix/store/prh52g9iwjdddxbv4n0b52gbnlxnnk6w-postgresql-17.4"
172172
},
173173
"aarch64-linux": {
174174
"outputs": [
175175
{
176176
"name": "out",
177-
"path": "/nix/store/37r0vmsb8xd1kv3wjd99kr59q99ja3g0-postgresql-16.4",
177+
"path": "/nix/store/1lgjdy1nm8l68y2jw6m1lhas4j5jcmk1-postgresql-17.4",
178178
"default": true
179179
},
180180
{
181181
"name": "man",
182-
"path": "/nix/store/mawnv85hv5y64csbmpgrnz88j7r8cby5-postgresql-16.4-man",
182+
"path": "/nix/store/1v352rrzfv5p105jfaizxhd29nk41hgp-postgresql-17.4-man",
183183
"default": true
184184
},
185185
{
186186
"name": "debug",
187-
"path": "/nix/store/71hz4hv1n6ivymbzd0jm3a61cyj9fwh5-postgresql-16.4-debug"
187+
"path": "/nix/store/5bywayb6ywgznzh9cck9wpya1bzg4v0a-postgresql-17.4-debug"
188188
},
189189
{
190190
"name": "dev",
191-
"path": "/nix/store/ibhwvhq4gkdibkfrkqg9vmip9mhhrg2q-postgresql-16.4-dev"
191+
"path": "/nix/store/zs35b02p7cay6jp7zr1xihwx8vzab17c-postgresql-17.4-dev"
192192
},
193193
{
194194
"name": "doc",
195-
"path": "/nix/store/rmvkab0pxjjjznk350syr3gzpa13dz1k-postgresql-16.4-doc"
195+
"path": "/nix/store/1qccl3dm5wcja6h2kjkhvs5r9l1bx4hz-postgresql-17.4-doc"
196196
},
197197
{
198198
"name": "lib",
199-
"path": "/nix/store/39mnmp40qhpq2h6r3cj66s23sb5fkzr6-postgresql-16.4-lib"
199+
"path": "/nix/store/kyh4l6wsjgwghvjw9810p7nn1ap106mj-postgresql-17.4-lib"
200200
}
201201
],
202-
"store_path": "/nix/store/37r0vmsb8xd1kv3wjd99kr59q99ja3g0-postgresql-16.4"
202+
"store_path": "/nix/store/1lgjdy1nm8l68y2jw6m1lhas4j5jcmk1-postgresql-17.4"
203203
},
204204
"x86_64-darwin": {
205205
"outputs": [
206206
{
207207
"name": "out",
208-
"path": "/nix/store/vlgydd1rakmw9j14i8dgrlhzj4pa82vi-postgresql-16.4",
208+
"path": "/nix/store/0mzgv54qxafr66f4d7prz42fhs833mhk-postgresql-17.4",
209209
"default": true
210210
},
211211
{
212212
"name": "man",
213-
"path": "/nix/store/2wm2caki07a557z97228n2zxrd3a8j4b-postgresql-16.4-man",
213+
"path": "/nix/store/akb98lb29c1x3mflzcwqy4a0gqfk331r-postgresql-17.4-man",
214214
"default": true
215215
},
216216
{
217-
"name": "doc",
218-
"path": "/nix/store/r03r96a44grl85sflw6hvwwlrzr32rk9-postgresql-16.4-doc"
217+
"name": "dev",
218+
"path": "/nix/store/gpkbg9yhx7jji2hr3jp89q06hi6v7qrk-postgresql-17.4-dev"
219219
},
220220
{
221-
"name": "lib",
222-
"path": "/nix/store/cy3q9y20jwk1vkd6jxf3mnq6xzbb9dn8-postgresql-16.4-lib"
221+
"name": "doc",
222+
"path": "/nix/store/9hxw6pf1qnlz1ygx5ximyvc48swb54n0-postgresql-17.4-doc"
223223
},
224224
{
225-
"name": "dev",
226-
"path": "/nix/store/96nxx00m06jl2jmvb16916l2rpwb13hk-postgresql-16.4-dev"
225+
"name": "lib",
226+
"path": "/nix/store/kyml5v1q498ympq67jvcnhgmsn8384zk-postgresql-17.4-lib"
227227
}
228228
],
229-
"store_path": "/nix/store/vlgydd1rakmw9j14i8dgrlhzj4pa82vi-postgresql-16.4"
229+
"store_path": "/nix/store/0mzgv54qxafr66f4d7prz42fhs833mhk-postgresql-17.4"
230230
},
231231
"x86_64-linux": {
232232
"outputs": [
233233
{
234234
"name": "out",
235-
"path": "/nix/store/mjjfx6yyaaba5hmv6bga20m8fxrca93l-postgresql-16.4",
235+
"path": "/nix/store/snfxmriwav4i0k1fxp78xk5w12hbv4q9-postgresql-17.4",
236236
"default": true
237237
},
238238
{
239239
"name": "man",
240-
"path": "/nix/store/b8cvsw47h2487y4j805zi0645x3ajh1i-postgresql-16.4-man",
240+
"path": "/nix/store/pcx190vq4awjcgpmj2flrbp9awhdc74q-postgresql-17.4-man",
241241
"default": true
242242
},
243-
{
244-
"name": "doc",
245-
"path": "/nix/store/apbxfs52v8im9725mn2f1jhgbdfggrpd-postgresql-16.4-doc"
246-
},
247243
{
248244
"name": "lib",
249-
"path": "/nix/store/32cprs7xwxvb0rw2imfrgy5vcacc27hc-postgresql-16.4-lib"
245+
"path": "/nix/store/yja4rgfrwyxckwqf10rbr4armbn0p2y5-postgresql-17.4-lib"
250246
},
251247
{
252248
"name": "debug",
253-
"path": "/nix/store/alcnsd7fkkr3iipvcn9gzsyv16kab6m9-postgresql-16.4-debug"
249+
"path": "/nix/store/zrlrz84kzfvnxcx5mis53scr205p29hx-postgresql-17.4-debug"
254250
},
255251
{
256252
"name": "dev",
257-
"path": "/nix/store/pqya8lq5jyplfmbmafrrwsrsi07d5ssn-postgresql-16.4-dev"
253+
"path": "/nix/store/piqzr58swwmbsngl3jp98xgrf17a960n-postgresql-17.4-dev"
254+
},
255+
{
256+
"name": "doc",
257+
"path": "/nix/store/ilc5sycwvqjjfa33978nb1p14x358l1c-postgresql-17.4-doc"
258258
}
259259
],
260-
"store_path": "/nix/store/mjjfx6yyaaba5hmv6bga20m8fxrca93l-postgresql-16.4"
260+
"store_path": "/nix/store/snfxmriwav4i0k1fxp78xk5w12hbv4q9-postgresql-17.4"
261261
}
262262
}
263263
}

src/config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
)
99

1010
const (
11-
VERSION = "0.48.0"
11+
VERSION = "0.49.0"
1212

1313
ENV_PORT = "BEMIDB_PORT"
1414
ENV_DATABASE = "BEMIDB_DATABASE"

src/iceberg_writer_table.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ func (writer *IcebergWriterTable) Write(loadRows func() ([][]string, InternalTab
7878
firstNewParquetFile = newParquetFile
7979
}
8080

81-
if writer.continuedRefresh && (newInternalTableMetadata.LastRefreshMode == RefreshModeIncremental || newInternalTableMetadata.LastRefreshMode == RefreshModeIncrementalInProgress) {
81+
if writer.continuedRefresh {
8282
var overwrittenManifestListFilesSortedAsc []ManifestListFile
8383

8484
existingManifestListItemsSortedDesc, overwrittenManifestListFilesSortedAsc, lastSequenceNumber = writer.overwriteExistingFiles(
@@ -111,7 +111,7 @@ func (writer *IcebergWriterTable) Write(loadRows func() ([][]string, InternalTab
111111
err = writer.storage.WriteInternalTableMetadata(metadataDirPath, newInternalTableMetadata)
112112
PanicIfError(writer.config, err)
113113

114-
loadMoreRows = newInternalTableMetadata.InProgress()
114+
loadMoreRows = newInternalTableMetadata.IsInProgress()
115115
LogDebug(writer.config, "Written", newParquetCount, "Parquet file(s). Load more rows:", loadMoreRows)
116116
}
117117
}

src/query_handler_test.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package main
33
import (
44
"encoding/binary"
55
"reflect"
6-
"strconv"
76
"strings"
87
"testing"
98

@@ -1677,7 +1676,3 @@ func testCommandCompleteTag(t *testing.T, message pgproto3.Message, expectedTag
16771676
t.Errorf("Expected the command tag to be %v, got %v", expectedTag, string(commandComplete.CommandTag))
16781677
}
16791678
}
1680-
1681-
func Uint32ToString(i uint32) string {
1682-
return strconv.FormatUint(uint64(i), 10)
1683-
}

src/storage_interface.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,28 +64,33 @@ type MetadataFile struct {
6464
}
6565

6666
type InternalTableMetadata struct {
67-
LastSyncedAt int64 `json:"last-synced-at"`
6867
LastRefreshMode RefreshMode `json:"last-refresh-mode"`
68+
LastSyncedAt int64 `json:"last-synced-at"`
69+
LastTxid int64 `json:"last-txid"`
6970
MaxXmin *uint32 `json:"max-xmin"`
7071
}
7172

72-
func (internalTableMetadata InternalTableMetadata) InProgress() bool {
73+
func (internalTableMetadata InternalTableMetadata) IsInProgress() bool {
7374
return internalTableMetadata.LastRefreshMode == RefreshModeIncrementalInProgress || internalTableMetadata.LastRefreshMode == RefreshModeFullInProgress
7475
}
7576

7677
func (internalTableMetadata InternalTableMetadata) MaxXminString() string {
7778
if internalTableMetadata.MaxXmin == nil {
78-
return "null"
79+
panic("MaxXmin is unexpectedly null. " + internalTableMetadata.String())
7980
}
80-
return fmt.Sprint(*internalTableMetadata.MaxXmin)
81+
return Uint32ToString(*internalTableMetadata.MaxXmin)
82+
}
83+
84+
func (internalTableMetadata InternalTableMetadata) LastWrappedAroundTxidString() string {
85+
return Int64ToString(PgWraparoundTxid(internalTableMetadata.LastTxid))
8186
}
8287

8388
func (internalTableMetadata InternalTableMetadata) String() string {
8489
return fmt.Sprintf(
85-
"LastSyncedAt: %d, LastRefreshMode: %s, MaxXmin: %s",
86-
internalTableMetadata.LastSyncedAt,
90+
"LastRefreshMode: %s, LastSyncedAt: %d, MaxXmin: %d",
8791
internalTableMetadata.LastRefreshMode,
88-
internalTableMetadata.MaxXminString(),
92+
internalTableMetadata.LastSyncedAt,
93+
*internalTableMetadata.MaxXmin,
8994
)
9095
}
9196

0 commit comments

Comments
 (0)