Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip: sqlite experiment #5446

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,12 @@ RUN DEBIAN_FRONTEND=noninteractive \
postgresql-14 \
postgresql-client-14 \
postgresql-contrib-14 \
sqlite3 \
git-restore-mtime \
nodejs \
google-cloud-sdk \
google-cloud-sdk-pubsub-emulator \
google-cloud-sdk-gke-gcloud-auth-plugin \
# google-cloud-sdk \
# google-cloud-sdk-pubsub-emulator \
# google-cloud-sdk-gke-gcloud-auth-plugin \
jq \
parallel \
# yugabyte
Expand Down Expand Up @@ -148,6 +149,20 @@ RUN DEBIAN_FRONTEND=noninteractive \
&& apt clean \
&& rm -rf /var/lib/apt/lists/*

# Install Google Cloud SDK
RUN DEBIAN_FRONTEND=noninteractive \
apt-get update && \
apt-get install -y openjdk-17-jre-headless && \
curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-455.0.0-linux-$(case ${TARGETARCH} in amd64) echo "x86_64";; arm64) echo "arm";; esac).tar.gz && \
tar -xf google-cloud-cli-*.tar.gz && \
./google-cloud-sdk/install.sh --quiet --install-python false --usage-reporting false && \
rm google-cloud-cli-*.tar.gz && \
sudo mv google-cloud-sdk /usr/local/google-cloud-sdk && \
sudo ln -s /usr/local/google-cloud-sdk/bin/gcloud /usr/local/bin/gcloud && \
sudo ln -s /usr/local/google-cloud-sdk/bin/gsutil /usr/local/bin/gsutil && \
sudo gcloud components install beta gke-gcloud-auth-plugin pubsub-emulator --quiet && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# As of Ubuntu 24.04, an install includes
# an 'ubuntu' user, that we don't use,
Expand Down
213 changes: 213 additions & 0 deletions backend/migrations/20251001_000000_migrationsSqlite.sqlite3
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
CREATE TABLE IF NOT EXISTS
system_migrations_v0
( name TEXT PRIMARY KEY
, execution_date TEXT NOT NULL -- timestamp
, sql TEXT NOT NULL
);


CREATE TABLE IF NOT EXISTS
accounts_v0
-- TODO include name
-- and update references (i.e. in package_types) to be based on id
( id TEXT PRIMARY KEY -- UUID stored as text
, created_at TEXT NOT NULL DEFAULT (datetime('now'))
);

--------------------
-- Stuff that belongs in "package space"
--------------------
CREATE TABLE IF NOT EXISTS
package_types_v0
( id TEXT PRIMARY KEY -- UUID stored as text
, owner TEXT NOT NULL -- e.g. Darklang
, modules TEXT NOT NULL -- e.g. Twitter.Other
, name TEXT NOT NULL -- e.g. TextMetadata
, definition BLOB NOT NULL -- the whole thing serialized as binary, in ProgramTypes form
, created_at TEXT NOT NULL DEFAULT (datetime('now'))
);


CREATE TABLE IF NOT EXISTS
package_constants_v0
( id TEXT PRIMARY KEY
, owner TEXT NOT NULL -- e.g. Darklang
, modules TEXT NOT NULL -- e.g. Math.Geometry
, name TEXT NOT NULL -- e.g. pi
, definition BLOB NOT NULL -- the whole thing serialized as binary, in ProgramTypes form
, created_at TEXT NOT NULL DEFAULT (datetime('now'))
);

CREATE TABLE IF NOT EXISTS
package_functions_v0
( id TEXT PRIMARY KEY
, owner TEXT NOT NULL -- e.g. Darklang
, modules TEXT NOT NULL -- e.g. Twitter.Other
, name TEXT NOT NULL -- e.g. sendText
, definition BLOB NOT NULL -- the whole thing serialized as binary, in ProgramTypes form
, created_at TEXT NOT NULL DEFAULT (datetime('now'))
);

--------------------
-- Stuff that belongs in "user space"
--------------------
CREATE TABLE IF NOT EXISTS
canvases_v0
( id TEXT PRIMARY KEY
, account_id TEXT NOT NULL
, created_at TEXT NOT NULL DEFAULT (datetime('now'))
, FOREIGN KEY(account_id) REFERENCES accounts_v0(id)
);

-- User K/V DBs
CREATE TABLE IF NOT EXISTS
user_data_v0
( id TEXT PRIMARY KEY
, canvas_id TEXT NOT NULL
, table_tlid INTEGER NOT NULL
, user_version INTEGER NOT NULL
, dark_version INTEGER NOT NULL
, data TEXT NOT NULL -- JSON stored as text
, created_at TEXT NOT NULL DEFAULT (datetime('now'))
, updated_at TEXT NOT NULL DEFAULT (datetime('now'))
, key TEXT NOT NULL
, UNIQUE (canvas_id, table_tlid, dark_version, user_version, key)
);

CREATE INDEX IF NOT EXISTS
idx_user_data_fetch
ON user_data_v0
(canvas_id, table_tlid, user_version, dark_version);

CREATE INDEX IF NOT EXISTS
idx_user_data_current_data_for_tlid
ON user_data_v0
(user_version, dark_version, canvas_id, table_tlid);

-- No GIN index equivalent in SQLite
CREATE INDEX IF NOT EXISTS
idx_user_data_json
ON user_data_v0
(data);


-- HTTP Handlers
CREATE TABLE IF NOT EXISTS
domains_v0
( domain TEXT PRIMARY KEY
, canvas_id TEXT NOT NULL
, created_at TEXT NOT NULL DEFAULT (datetime('now')));
-- TODO: extract out table of http handlers from toplevels_v0


-- CRONs
CREATE TABLE IF NOT EXISTS
cron_records_v0
( id TEXT PRIMARY KEY
, tlid INTEGER NOT NULL
, canvas_id TEXT NOT NULL
, ran_at TEXT NOT NULL DEFAULT (datetime('now')) -- default as it's cheap
);

CREATE INDEX IF NOT EXISTS
idx_cron_records_tlid_canvas_id_id
ON cron_records_v0
(tlid, canvas_id, id DESC);

-- Queues/Workers

-- Instead of ENUM, use CHECK constraint
CREATE TABLE IF NOT EXISTS
scheduling_rules_v0
( id TEXT PRIMARY KEY
, rule_type TEXT NOT NULL CHECK (rule_type IN ('pause', 'block'))
, canvas_id TEXT NOT NULL
, handler_name TEXT NOT NULL
, event_space TEXT NOT NULL
, created_at TEXT NOT NULL DEFAULT (datetime('now'))
);

CREATE TABLE IF NOT EXISTS
queue_events_v0
( id TEXT PRIMARY KEY
, canvas_id TEXT NOT NULL
, module TEXT NOT NULL
, name TEXT NOT NULL
, modifier TEXT NOT NULL
, locked_at TEXT -- nullable
, enqueued_at TEXT NOT NULL
, value TEXT NOT NULL
);


-- We want to use this index to:
-- 1) count the number of items in this queue, so it's important that the entire
-- search term is in the index or it will need to hit disk. This is true even though
-- the module rarely changes
-- 2) fetch the indexes for all items we're unpausing. This is rare so it's fine to
CREATE INDEX IF NOT EXISTS
idx_queue_events_count
ON queue_events_v0
(canvas_id, module, name);


-- Secrets
CREATE TABLE IF NOT EXISTS
secrets_v0
( canvas_id TEXT NOT NULL
, name TEXT NOT NULL
, value TEXT NOT NULL
, version INTEGER NOT NULL
, created_at TEXT NOT NULL DEFAULT (datetime('now'))
, PRIMARY KEY (canvas_id, name, version) -- TODO: simplfy PK
);


-- Top-levels
CREATE TABLE IF NOT EXISTS
toplevels_v0
( canvas_id TEXT NOT NULL
, tlid INTEGER NOT NULL
, digest CHAR(32) NOT NULL
, tipe TEXT NOT NULL CHECK (tipe IN ('db', 'handler'))
, name TEXT /* handlers only - used for http lookups */
, module TEXT /* handlers only */
, modifier TEXT /* handlers only */
, updated_at TEXT NOT NULL DEFAULT (datetime('now'))
, created_at TEXT NOT NULL DEFAULT (datetime('now'))
, deleted INTEGER NOT NULL CHECK (deleted IN (0,1))
, data BLOB NOT NULL
, PRIMARY KEY (canvas_id, tlid)
);

-- Traces
CREATE TABLE IF NOT EXISTS
traces_v0
( id TEXT PRIMARY KEY
, trace_id TEXT NOT NULL -- why do we need this _and_ `id`?
, canvas_id TEXT NOT NULL
-- the handler's (or for a function's default trace, the function's) TLID
-- (used to store the trace data in Cloud Storage)
-- TODO consider using a different mechanism here - fns might not have tlids...
-- why wouldn't we use the `id` instead? length?
, root_tlid INTEGER NOT NULL
, callgraph_tlids TEXT NOT NULL -- functions called during the trace
);



-- differences from PostgreSQL:
-- 1. No TIMESTAMPTZ - using TEXT for timestamps
-- 2. No ENUM types - using TEXT with CHECK constraints
-- CREATE TYPE scheduling_rule_type AS ENUM ('pause', 'block'); - CREATE TABLE scheduling_rules_v0 (rule_type TEXT NOT NULL CHECK (rule_type IN ('pause', 'block')))
-- 3. No JSONB - using TEXT for JSON
-- 4. No UUID type - using TEXT
-- 5. No GIN index
-- 6. Arrays stored as JSON strings
-- BIGINT[] - TEXT
-- 7. No Now() - datetime('now')
-- BYTEA is a BLOB
-- REFERENCES accounts_v0(id) - FOREIGN KEY(account_id) REFERENCES accounts_v0(id)
-- BIGINT is INTEGER
-- INT is INTEGER
-- CONSTRAINT user_data_key_uniq UNIQUE (canvas_id, table_tlid, dark_version, user_version, key) - UNIQUE (canvas_id, table_tlid, dark_version, user_version, key)
1 change: 1 addition & 0 deletions backend/paket.dependencies
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ nuget OpenTelemetry.Instrumentation.Http = 1.8.1
nuget Npgsql.FSharp = 5.7.0
nuget NpgsqlYugabyteDB = 8.0.3-yb-1
nuget Npgsql.NodaTime = 8.0.3
nuget Microsoft.Data.Sqlite = 9.0.0
nuget Sodium.Core = 1.3.4
nuget Google.Cloud.PubSub.V1 = 3.8.0
nuget Google.Cloud.Storage.V1 = 4.7.0
Expand Down
14 changes: 14 additions & 0 deletions backend/paket.lock
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,12 @@ NUGET
Microsoft.AspNetCore.Metadata (8.0.7)
Microsoft.Bcl.AsyncInterfaces (8.0)
Microsoft.CodeAnalysis.NetAnalyzers (8.0)
Microsoft.Data.Sqlite (9.0)
Microsoft.Data.Sqlite.Core (>= 9.0)
SQLitePCLRaw.bundle_e_sqlite3 (>= 2.1.10)
SQLitePCLRaw.core (>= 2.1.10)
Microsoft.Data.Sqlite.Core (9.0.1)
SQLitePCLRaw.core (>= 2.1.10)
Microsoft.Extensions.Configuration (8.0)
Microsoft.Extensions.Configuration.Abstractions (>= 8.0)
Microsoft.Extensions.Primitives (>= 8.0)
Expand Down Expand Up @@ -304,6 +310,14 @@ NUGET
System.Memory (>= 4.5.5)
Sodium.Core (1.3.4)
libsodium (>= 1.0.19)
SQLitePCLRaw.bundle_e_sqlite3 (2.1.10)
SQLitePCLRaw.lib.e_sqlite3 (>= 2.1.10)
SQLitePCLRaw.provider.e_sqlite3 (>= 2.1.10)
SQLitePCLRaw.core (2.1.10)
System.Memory (>= 4.5.3)
SQLitePCLRaw.lib.e_sqlite3 (2.1.10)
SQLitePCLRaw.provider.e_sqlite3 (2.1.10)
SQLitePCLRaw.core (>= 2.1.10)
System.Buffers (4.5.1)
System.CodeDom (8.0)
System.Collections.Immutable (8.0)
Expand Down
48 changes: 30 additions & 18 deletions backend/src/BuiltinCliHost/Libs/Cli.fs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
module BuiltinCliHost.Libs.Cli

open System.Threading.Tasks
open FSharp.Control.Tasks


open Prelude
open LibExecution.RuntimeTypes
Expand All @@ -28,24 +30,34 @@ module ExecutionError =
let typeRef = TCustomType(Ok fqTypeName, [])


// TODO: de-dupe with _other_ Cli.fs
let pmBaseUrl =
match
System.Environment.GetEnvironmentVariable "DARK_CONFIG_PACKAGE_MANAGER_BASE_URL"
with
| null -> "https://packages.darklang.com"
| var -> var
let packageManagerRT = LibPackageManager.PackageManager.rt pmBaseUrl
let packageManagerPT = LibPackageManager.PackageManager.pt pmBaseUrl
module Config =
let pmBaseUrl =
match
System.Environment.GetEnvironmentVariable
"DARK_CONFIG_PACKAGE_MANAGER_BASE_URL"
with
| null -> "https://packages.darklang.com"
| var -> var

let initializePackageManagers () =
task {
let packageManagerRT = LibPackageManager.PackageManager.rt pmBaseUrl
let packageManagerPT = LibPackageManager.PackageManager.pt pmBaseUrl

do! packageManagerRT.init

return packageManagerRT, packageManagerPT
}

let packageManagerRT, packageManagerPT = initializePackageManagers().Result

let builtinsToUse : RT.Builtins =
LibExecution.Builtin.combine
[ BuiltinExecution.Builtin.builtins
BuiltinExecution.Libs.HttpClient.defaultConfig
packageManagerPT
BuiltinCli.Builtin.builtins ]
[]
let builtinsToUse : RT.Builtins =
LibExecution.Builtin.combine
[ BuiltinExecution.Builtin.builtins
BuiltinExecution.Libs.HttpClient.defaultConfig
packageManagerPT
BuiltinCli.Builtin.builtins ]
[]


let execute
Expand Down Expand Up @@ -76,13 +88,13 @@ let execute
mod'.submodules.fns |> List.map PT2RT.PackageFn.toRT ]

let packageManager =
packageManagerRT |> PackageManager.withExtras types constants fns
Config.packageManagerRT |> PackageManager.withExtras types constants fns

let tracing = Exe.noTracing

let state =
Exe.createState
builtinsToUse
Config.builtinsToUse
packageManager
tracing
parentState.reportException
Expand Down
1 change: 1 addition & 0 deletions backend/src/Cli/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
lib
Loading