diff --git a/db/drop_indices.sql b/db/drop_indices.sql index 1be451d6..a9fe7990 100644 --- a/db/drop_indices.sql +++ b/db/drop_indices.sql @@ -1,7 +1,11 @@ --- Remove indices before bulk insertion -DROP INDEX IF EXISTS data_timestamp_index, - data_timeseries_index, - nonscalar_data_timestamp_index, - nonscalar_data_timeseries_index, - old_flags_obtime_index, - old_flags_timeseries_index; +DO $$ +DECLARE + i RECORD; +BEGIN + FOR i IN (SELECT schemaname, indexname fROM pg_indexes + WHERE schemaname IN ('public', 'flags') + AND NOT indexdef LIKE '%UNIQUE%') + LOOP + EXECUTE format('DROP INDEX IF EXISTS %s.%s', i.schemaname, i.indexname); + END LOOP; +END $$; diff --git a/db/flags.sql b/db/flags.sql index d3d5de2d..4f2ef406 100644 --- a/db/flags.sql +++ b/db/flags.sql @@ -7,20 +7,8 @@ CREATE TABLE IF NOT EXISTS flags.kvdata ( corrected REAL NULL, controlinfo TEXT NULL, useinfo TEXT NULL, - cfailed INT4 NULL, + cfailed TEXT NULL, CONSTRAINT unique_kvdata_timeseries_obstime UNIQUE (timeseries, obstime) ); -CREATE INDEX IF NOT EXISTS kvdata_obtime_index ON flags.kvdata (obstime); -CREATE INDEX IF NOT EXISTS kvdata_timeseries_index ON flags.kvdata USING HASH (timeseries); - -CREATE TABLE IF NOT EXISTS flags.old_databases ( - timeseries INT4 REFERENCES public.timeseries, - obstime TIMESTAMPTZ NOT NULL, - corrected REAL NULL, - controlinfo TEXT NULL, - useinfo TEXT NULL, - cfailed INT4 NULL , - CONSTRAINT unique_old_flags_timeseries_obstime UNIQUE (timeseries, obstime) -); -CREATE INDEX IF NOT EXISTS old_flags_obtime_index ON flags.old_databases (obstime); -CREATE INDEX IF NOT EXISTS old_flags_timeseries_index ON flags.old_databases USING HASH (timeseries); +CREATE INDEX IF NOT EXISTS kvdata_obstime_index ON flags.kvdata (obstime); +CREATE INDEX IF NOT EXISTS kvdata_timeseries_index ON flags.kvdata USING HASH (timeseries); diff --git a/db/partitions_generated.sql b/db/partitions_generated.sql index 070a914d..73aebbb4 100644 --- a/db/partitions_generated.sql +++ b/db/partitions_generated.sql @@ -1,4 +1,6 @@ -- Generated by simple script for testing +CREATE TABLE IF NOT EXISTS data_y1850_to_y1950 PARTITION OF public.data +FOR VALUES FROM ('1850-01-01 00:00:00+00') TO ('1950-01-01 00:00:00+00'); CREATE TABLE IF NOT EXISTS data_y1950_to_y2000 PARTITION OF public.data FOR VALUES FROM ('1950-01-01 00:00:00+00') TO ('2000-01-01 00:00:00+00'); CREATE TABLE IF NOT EXISTS data_y2000_to_y2010 PARTITION OF public.data @@ -35,6 +37,8 @@ CREATE TABLE IF NOT EXISTS data_y2028_to_y2029 PARTITION OF public.data FOR VALUES FROM ('2028-01-01 00:00:00+00') TO ('2029-01-01 00:00:00+00'); CREATE TABLE IF NOT EXISTS data_y2029_to_y2030 PARTITION OF public.data FOR VALUES FROM ('2029-01-01 00:00:00+00') TO ('2030-01-01 00:00:00+00'); +CREATE TABLE IF NOT EXISTS nonscalar_data_y1850_to_y1950 PARTITION OF public.nonscalar_data +FOR VALUES FROM ('1850-01-01 00:00:00+00') TO ('1950-01-01 00:00:00+00'); CREATE TABLE IF NOT EXISTS nonscalar_data_y1950_to_y2000 PARTITION OF public.nonscalar_data FOR VALUES FROM ('1950-01-01 00:00:00+00') TO ('2000-01-01 00:00:00+00'); CREATE TABLE IF NOT EXISTS nonscalar_data_y2000_to_y2010 PARTITION OF public.nonscalar_data diff --git a/ingestion/src/kvkafka.rs b/ingestion/src/kvkafka.rs index 0a99eebf..d5573c26 100644 --- a/ingestion/src/kvkafka.rs +++ b/ingestion/src/kvkafka.rs @@ -109,7 +109,7 @@ pub struct Kvdata { #[serde(default, deserialize_with = "optional")] useinfo: Option, #[serde(default, deserialize_with = "optional")] - cfailed: Option, + cfailed: Option, } // If the field is either empty or missing it should deserialize to None. diff --git a/integration_tests/src/main.rs b/integration_tests/src/main.rs index a73a1241..36eece8e 100644 --- a/integration_tests/src/main.rs +++ b/integration_tests/src/main.rs @@ -9,22 +9,6 @@ async fn insert_schema(client: &tokio_postgres::Client, filename: &str) -> Resul client.batch_execute(schema.as_str()).await } -fn format_partition(start: &str, end: &str, table: &str) -> String { - // TODO: add multiple partitions? - format!( - "CREATE TABLE {table}_y{start}_to_y{end} PARTITION OF {table} \ - FOR VALUES FROM ('{start}-01-01 00:00:00+00') TO ('{end}-01-01 00:00:00+00')", - ) -} - -async fn create_data_partitions(client: &tokio_postgres::Client) -> Result<(), Error> { - let scalar_string = format_partition("1950", "2100", "public.data"); - let nonscalar_string = format_partition("1950", "2100", "public.nonscalar_data"); - - client.batch_execute(scalar_string.as_str()).await?; - client.batch_execute(nonscalar_string.as_str()).await -} - #[tokio::main] async fn main() { let (client, connection) = tokio_postgres::connect(CONNECT_STRING, NoTls) @@ -38,10 +22,13 @@ async fn main() { }); // NOTE: order matters - let schemas = ["db/public.sql", "db/labels.sql", "db/flags.sql"]; + let schemas = [ + "db/public.sql", + "db/partitions_generated.sql", + "db/labels.sql", + "db/flags.sql", + ]; for schema in schemas { insert_schema(&client, schema).await.unwrap(); } - - create_data_partitions(&client).await.unwrap(); } diff --git a/justfile b/justfile index 9e111a00..f8889de0 100644 --- a/justfile +++ b/justfile @@ -6,20 +6,25 @@ test_all: setup && clean cargo test --workspace --no-fail-fast -- --nocapture --test-threads=1 test_end_to_end: setup && clean - cargo test --test end_to_end --no-fail-fast -- --nocapture --test-threads=1 + -cargo test --test end_to_end --no-fail-fast -- --nocapture --test-threads=1 test_migrations: debug_migrations && clean # Debug commands don't perfom the clean up action after running. # This allows to manually check the state of the database. + debug_kafka: setup - cargo test --test end_to_end test_kafka --features debug --no-fail-fast -- --nocapture --test-threads=1 + -cargo test --test end_to_end test_kafka --features debug --no-fail-fast -- --nocapture --test-threads=1 debug_test TEST: setup - cargo test {{TEST}} --features debug --no-fail-fast -- --nocapture --test-threads=1 + -cargo test {{TEST}} --features debug --no-fail-fast -- --nocapture --test-threads=1 debug_migrations: setup - @ cd migrations && go test -v ./... + -@ cd migrations && go test -v ./... + +# psql into the container database +psql: + @docker exec -it lard_tests psql -U postgres setup: @ echo "Starting Postgres docker container..." diff --git a/migrations/README.md b/migrations/README.md index bfa03aa5..92a845e8 100644 --- a/migrations/README.md +++ b/migrations/README.md @@ -1,6 +1,6 @@ # Migrations -Go package used to dump tables from old databases (KDVH, Kvalobs) and import them into LARD. +Go package that dumps tables from old databases (KDVH, Kvalobs) and imports them into LARD. ## Usage @@ -10,16 +10,18 @@ Go package used to dump tables from old databases (KDVH, Kvalobs) and import the go build ``` -1. Dump tables from KDVH +1. Dump tables ```terminal ./migrate kdvh dump + ./migrate kvalobs dump ``` 1. Import dumps into LARD ```terminal ./migrate kdvh import + ./migrate kvalobs import ``` For each command, you can use the `--help` flag to see all available options. diff --git a/migrations/go.mod b/migrations/go.mod index 4153ee93..c53e083b 100644 --- a/migrations/go.mod +++ b/migrations/go.mod @@ -3,15 +3,16 @@ module migrate go 1.22.3 require ( + github.com/alexflint/go-arg v1.5.1 github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1 github.com/jackc/pgx/v5 v5.6.0 - github.com/jessevdk/go-flags v1.6.1 github.com/joho/godotenv v1.5.1 github.com/rickb777/period v1.0.5 github.com/schollz/progressbar/v3 v3.16.1 ) require ( + github.com/alexflint/go-scalar v1.2.0 // indirect github.com/govalues/decimal v0.1.29 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect @@ -25,5 +26,3 @@ require ( golang.org/x/term v0.25.0 // indirect golang.org/x/text v0.16.0 // indirect ) - -replace github.com/jessevdk/go-flags => github.com/Lun4m/go-flags v0.0.0-20241118100134-6375192b7985 diff --git a/migrations/go.sum b/migrations/go.sum index 54140c04..72aadfc2 100644 --- a/migrations/go.sum +++ b/migrations/go.sum @@ -1,7 +1,7 @@ -github.com/Lun4m/go-flags v0.0.0-20241113125827-68757125e949 h1:7xyEGIr1X5alOjBjlNTDF+aRBcRIo60YX5sdlziLE5w= -github.com/Lun4m/go-flags v0.0.0-20241113125827-68757125e949/go.mod h1:42/L0FDbP0qe91I+81tBqjU3uoz1tn1GDMZAhcCE2PE= -github.com/Lun4m/go-flags v0.0.0-20241118100134-6375192b7985 h1:eUA/sFZ1CtY9+9y/fPpUivYW8fJBlXqB4/8CjC+yXqk= -github.com/Lun4m/go-flags v0.0.0-20241118100134-6375192b7985/go.mod h1:42/L0FDbP0qe91I+81tBqjU3uoz1tn1GDMZAhcCE2PE= +github.com/alexflint/go-arg v1.5.1 h1:nBuWUCpuRy0snAG+uIJ6N0UvYxpxA0/ghA/AaHxlT8Y= +github.com/alexflint/go-arg v1.5.1/go.mod h1:A7vTJzvjoaSTypg4biM5uYNTkJ27SkNTArtYXnlqVO8= +github.com/alexflint/go-scalar v1.2.0 h1:WR7JPKkeNpnYIOfHRa7ivM21aWAdHD0gEWHCx+WQBRw= +github.com/alexflint/go-scalar v1.2.0/go.mod h1:LoFvNMqS1CPrMVltza4LvnGKhaSpc3oyLEBUZVhhS2o= github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM= github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -21,8 +21,6 @@ github.com/jackc/pgx/v5 v5.6.0 h1:SWJzexBzPL5jb0GEsrPMLIsi/3jOo7RHlzTjcAeDrPY= github.com/jackc/pgx/v5 v5.6.0/go.mod h1:DNZ/vlrUnhWCoFGxHAG8U2ljioxukquj7utPDgtQdTw= github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk= github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= -github.com/jessevdk/go-flags v1.6.1 h1:Cvu5U8UGrLay1rZfv/zP7iLpSHGUZ/Ou68T0iX1bBK4= -github.com/jessevdk/go-flags v1.6.1/go.mod h1:Mk8T1hIAWpOiJiHa9rJASDK2UGWji0EuPGBnNLMooyc= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= @@ -42,6 +40,7 @@ github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUc github.com/schollz/progressbar/v3 v3.16.1 h1:RnF1neWZFzLCoGx8yp1yF7SDl4AzNDI5y4I0aUJRrZQ= github.com/schollz/progressbar/v3 v3.16.1/go.mod h1:I2ILR76gz5VXqYMIY/LdLecvMHDPVcQm3W/MSKi1TME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= diff --git a/migrations/kdvh/import/convert_functions.go b/migrations/kdvh/db/convert_functions.go similarity index 54% rename from migrations/kdvh/import/convert_functions.go rename to migrations/kdvh/db/convert_functions.go index c0dcf881..448d809b 100644 --- a/migrations/kdvh/import/convert_functions.go +++ b/migrations/kdvh/db/convert_functions.go @@ -1,108 +1,75 @@ -package port +package db import ( "errors" "strconv" - "time" "github.com/rickb777/period" - "migrate/kdvh/db" - "migrate/kdvh/import/cache" "migrate/lard" ) -// The following ConvertFunctions try to recover the original pair of `controlinfo` -// and `useinfo` generated by Kvalobs for an observation, based on `Obs.Flags` and `Obs.Data` -// Different KDVH tables need different ways to perform this conversion (defined in CONV_MAP). -// -// It returns three structs for each of the lard tables we are inserting into -type ConvertFunction func(KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) - -func getConvertFunc(table *db.Table) ConvertFunction { - switch table.TableName { - case "T_EDATA": - return ConvertEdata - case "T_PDATA": - return ConvertPdata - case "T_NDATA": - return ConvertNdata - case "T_VDATA": - return ConvertVdata - case "T_MONTH", "T_DIURNAL", "T_HOMOGEN_DIURNAL", "T_HOMOGEN_MONTH": - return ConvertProduct - case "T_DIURNAL_INTERPOLATED": - return ConvertDiurnalInterpolated - } - return Convert -} - -type KdvhObs struct { - *cache.TsInfo - obstime time.Time - data string - flags string -} - // Work around to return reference to consts func addr[T any](t T) *T { return &t } -func (obs *KdvhObs) flagsAreValid() bool { - if len(obs.flags) != 5 { +func flagsAreValid(obs *KdvhObs) bool { + if len(obs.Flags) != 5 { return false } - _, err := strconv.ParseInt(obs.flags, 10, 32) + _, err := strconv.ParseInt(obs.Flags, 10, 32) return err == nil } -func (obs *KdvhObs) Useinfo() *string { - if !obs.flagsAreValid() { +func useinfo(obs *KdvhObs) *string { + if !flagsAreValid(obs) { return addr(INVALID_FLAGS) } - return addr(obs.flags + DELAY_DEFAULT) + return addr(obs.Flags + DELAY_DEFAULT) } // Default ConvertFunction // NOTE: this should be the only function that can return `lard.TextObs` with non-null text data. -func Convert(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { +func convert(obs *KdvhObs, ts *TsInfo) (lard.DataObs, lard.TextObs, lard.Flag, error) { var valPtr *float32 controlinfo := VALUE_PASSED_QC - if obs.data == "" { + if obs.Data == "" { controlinfo = VALUE_MISSING } - val, err := strconv.ParseFloat(obs.data, 32) + val, err := strconv.ParseFloat(obs.Data, 32) if err == nil { valPtr = addr(float32(val)) } return lard.DataObs{ - Id: obs.Id, - Obstime: obs.obstime, + Id: ts.Id, + Obstime: obs.Obstime, Data: valPtr, }, lard.TextObs{ - Id: obs.Id, - Obstime: obs.obstime, - Text: &obs.data, + Id: ts.Id, + Obstime: obs.Obstime, + Text: &obs.Data, }, lard.Flag{ - Id: obs.Id, - Obstime: obs.obstime, - Useinfo: obs.Useinfo(), + Id: ts.Id, + Obstime: obs.Obstime, + Original: valPtr, + Corrected: valPtr, Controlinfo: &controlinfo, + Useinfo: useinfo(obs), }, nil } // This function modifies obstimes to always use totime // This is needed because KDVH used incorrect and incosistent timestamps -func ConvertProduct(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { - data, text, flag, err := Convert(obs) - if !obs.Offset.IsZero() { - if temp, ok := obs.Offset.AddTo(data.Obstime); ok { +func convertProduct(obs *KdvhObs, ts *TsInfo) (lard.DataObs, lard.TextObs, lard.Flag, error) { + data, text, flag, err := convert(obs, ts) + if !ts.Offset.IsZero() { + if temp, ok := ts.Offset.AddTo(data.Obstime); ok { data.Obstime = temp text.Obstime = temp flag.Obstime = temp @@ -111,12 +78,12 @@ func ConvertProduct(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) return data, text, flag, err } -func ConvertEdata(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { +func convertEdata(obs *KdvhObs, ts *TsInfo) (lard.DataObs, lard.TextObs, lard.Flag, error) { var controlinfo string var valPtr *float32 - if val, err := strconv.ParseFloat(obs.data, 32); err != nil { - switch obs.flags { + if val, err := strconv.ParseFloat(obs.Data, 32); err != nil { + switch obs.Flags { case "70381", "70389", "90989": controlinfo = VALUE_REMOVED_BY_QC default: @@ -129,29 +96,31 @@ func ConvertEdata(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { } return lard.DataObs{ - Id: obs.Id, - Obstime: obs.obstime, + Id: ts.Id, + Obstime: obs.Obstime, Data: valPtr, }, lard.TextObs{ - Id: obs.Id, - Obstime: obs.obstime, - Text: &obs.data, + Id: ts.Id, + Obstime: obs.Obstime, + Text: &obs.Data, }, lard.Flag{ - Id: obs.Id, - Obstime: obs.obstime, - Useinfo: obs.Useinfo(), + Id: ts.Id, + Obstime: obs.Obstime, + Original: valPtr, + Corrected: valPtr, Controlinfo: &controlinfo, + Useinfo: useinfo(obs), }, nil } -func ConvertPdata(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { +func convertPdata(obs *KdvhObs, ts *TsInfo) (lard.DataObs, lard.TextObs, lard.Flag, error) { var controlinfo string var valPtr *float32 - if val, err := strconv.ParseFloat(obs.data, 32); err != nil { - switch obs.flags { + if val, err := strconv.ParseFloat(obs.Data, 32); err != nil { + switch obs.Flags { case "20389", "30389", "40389", "50383", "70381", "71381": controlinfo = VALUE_REMOVED_BY_QC default: @@ -164,7 +133,7 @@ func ConvertPdata(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { } else { valPtr = addr(float32(val)) - switch obs.flags { + switch obs.Flags { case "10319", "10329", "30319", "40319", "48929", "48999": controlinfo = VALUE_MANUALLY_INTERPOLATED case "20389", "30389", "40389", "50383", "70381", "71381", "99319": @@ -178,29 +147,31 @@ func ConvertPdata(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { } return lard.DataObs{ - Id: obs.Id, - Obstime: obs.obstime, + Id: ts.Id, + Obstime: obs.Obstime, Data: valPtr, }, lard.TextObs{ - Id: obs.Id, - Obstime: obs.obstime, - Text: &obs.data, + Id: ts.Id, + Obstime: obs.Obstime, + Text: &obs.Data, }, lard.Flag{ - Id: obs.Id, - Obstime: obs.obstime, - Useinfo: obs.Useinfo(), + Id: ts.Id, + Obstime: obs.Obstime, + Original: valPtr, + Corrected: valPtr, Controlinfo: &controlinfo, + Useinfo: useinfo(obs), }, nil } -func ConvertNdata(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { +func convertNdata(obs *KdvhObs, ts *TsInfo) (lard.DataObs, lard.TextObs, lard.Flag, error) { var controlinfo string var valPtr *float32 - if val, err := strconv.ParseFloat(obs.data, 32); err != nil { - switch obs.flags { + if val, err := strconv.ParseFloat(obs.Data, 32); err != nil { + switch obs.Flags { case "70389": controlinfo = VALUE_REMOVED_BY_QC default: @@ -213,7 +184,7 @@ func ConvertNdata(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { } else { valPtr = addr(float32(val)) - switch obs.flags { + switch obs.Flags { case "43325", "48325": controlinfo = VALUE_MANUALLY_ASSIGNED case "30319", "38929", "40315", "40319": @@ -229,51 +200,53 @@ func ConvertNdata(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { } return lard.DataObs{ - Id: obs.Id, - Obstime: obs.obstime, + Id: ts.Id, + Obstime: obs.Obstime, Data: valPtr, }, lard.TextObs{ - Id: obs.Id, - Obstime: obs.obstime, - Text: &obs.data, + Id: ts.Id, + Obstime: obs.Obstime, + Text: &obs.Data, }, lard.Flag{ - Id: obs.Id, - Obstime: obs.obstime, - Useinfo: obs.Useinfo(), + Id: ts.Id, + Obstime: obs.Obstime, + Original: valPtr, + Corrected: valPtr, Controlinfo: &controlinfo, + Useinfo: useinfo(obs), }, nil } -func ConvertVdata(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { +func convertVdata(obs *KdvhObs, ts *TsInfo) (lard.DataObs, lard.TextObs, lard.Flag, error) { var useinfo, controlinfo string var valPtr *float32 // set useinfo based on time - if h := obs.obstime.Hour(); h == 0 || h == 6 || h == 12 || h == 18 { + if h := obs.Obstime.Hour(); h == 0 || h == 6 || h == 12 || h == 18 { useinfo = COMPLETED_HQC } else { useinfo = INVALID_FLAGS } // set data and controlinfo - if val, err := strconv.ParseFloat(obs.data, 32); err != nil { + if val, err := strconv.ParseFloat(obs.Data, 32); err != nil { controlinfo = VALUE_MISSING } else { // super special treatment clause of T_VDATA.OT_24, so it will be the same as in kvalobs // add custom offset, because OT_24 in KDVH has been treated differently than OT_24 in kvalobs - if obs.Element == "OT_24" { + if ts.Element == "OT_24" { offset, err := period.Parse("PT18H") // fromtime_offset -PT6H, timespan P1D if err != nil { return lard.DataObs{}, lard.TextObs{}, lard.Flag{}, errors.New("could not parse period") } - temp, ok := offset.AddTo(obs.obstime) + temp, ok := offset.AddTo(obs.Obstime) if !ok { return lard.DataObs{}, lard.TextObs{}, lard.Flag{}, errors.New("could not add period") } - obs.obstime = temp + obs.Obstime = temp // convert from hours to minutes val *= 60.0 } @@ -283,42 +256,46 @@ func ConvertVdata(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { } return lard.DataObs{ - Id: obs.Id, - Obstime: obs.obstime, + Id: ts.Id, + Obstime: obs.Obstime, Data: valPtr, }, lard.TextObs{ - Id: obs.Id, - Obstime: obs.obstime, - Text: &obs.data, + Id: ts.Id, + Obstime: obs.Obstime, + Text: &obs.Data, }, lard.Flag{ - Id: obs.Id, - Obstime: obs.obstime, + Id: ts.Id, + Obstime: obs.Obstime, + Original: valPtr, + Corrected: valPtr, Useinfo: &useinfo, Controlinfo: &controlinfo, }, nil } -func ConvertDiurnalInterpolated(obs KdvhObs) (lard.DataObs, lard.TextObs, lard.Flag, error) { - val, err := strconv.ParseFloat(obs.data, 32) +func convertDiurnalInterpolated(obs *KdvhObs, ts *TsInfo) (lard.DataObs, lard.TextObs, lard.Flag, error) { + val, err := strconv.ParseFloat(obs.Data, 32) if err != nil { return lard.DataObs{}, lard.TextObs{}, lard.Flag{}, err } - + valPtr := addr(float32(val)) return lard.DataObs{ - Id: obs.Id, - Obstime: obs.obstime, - Data: addr(float32(val)), + Id: ts.Id, + Obstime: obs.Obstime, + Data: valPtr, }, lard.TextObs{ - Id: obs.Id, - Obstime: obs.obstime, - Text: &obs.data, + Id: ts.Id, + Obstime: obs.Obstime, + Text: &obs.Data, }, lard.Flag{ - Id: obs.Id, - Obstime: obs.obstime, + Id: ts.Id, + Obstime: obs.Obstime, + Original: valPtr, + Corrected: valPtr, Useinfo: addr(DIURNAL_INTERPOLATED_USEINFO), Controlinfo: addr(VALUE_MANUALLY_INTERPOLATED), }, nil diff --git a/migrations/kdvh/db/dump_functions.go b/migrations/kdvh/db/dump_functions.go new file mode 100644 index 00000000..345d027e --- /dev/null +++ b/migrations/kdvh/db/dump_functions.go @@ -0,0 +1,293 @@ +package db + +import ( + "context" + "database/sql" + "encoding/csv" + "errors" + "fmt" + "io" + "log/slog" + "os" + "path/filepath" + "slices" + "strconv" + "time" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" +) + +// Format string for date field in CSV files +const TIMEFORMAT string = "2006-01-02_15:04:05" + +// Error returned if no observations are found for a (station, element) pair +var EMPTY_QUERY_ERR error = errors.New("The query did not return any rows") + +// Struct representing a single record in the output CSV file +type Record struct { + Time time.Time `db:"time"` + Data sql.NullString `db:"data"` + Flag sql.NullString `db:"flag"` +} + +func fileExists(filename string) error { + if _, err := os.Stat(filename); err == nil { + return errors.New( + fmt.Sprintf( + "Skipping dump of %q because dumped file already exists and the --overwrite flag was not provided", + filename, + )) + } + return nil +} + +// Helper function for dumpByYear functinos Fetch min and max year from table, needed for tables that are dumped by year +func fetchYearRange(tableName, station string, pool *pgxpool.Pool) (int64, int64, error) { + var beginStr, endStr string + query := fmt.Sprintf("SELECT min(to_char(dato, 'yyyy')), max(to_char(dato, 'yyyy')) FROM %s WHERE stnr = $1", tableName) + + if err := pool.QueryRow(context.TODO(), query, station).Scan(&beginStr, &endStr); err != nil { + return 0, 0, fmt.Errorf("Could not query row: %v", err) + } + + begin, err := strconv.ParseInt(beginStr, 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("Could not parse year %q: %s", beginStr, err) + } + + end, err := strconv.ParseInt(endStr, 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("Could not parse year %q: %s", endStr, err) + } + + return begin, end, nil +} + +// This function is used when the table contains large amount of data +// (T_SECOND, T_MINUTE, T_10MINUTE) +func dumpByYear(path string, args dumpArgs, logStr string, overwrite bool, pool *pgxpool.Pool) error { + dataBegin, dataEnd, err := fetchYearRange(args.dataTable, args.station, pool) + if err != nil { + return err + } + + flagBegin, flagEnd, err := fetchYearRange(args.flagTable, args.station, pool) + if err != nil { + return err + } + + begin := min(dataBegin, flagBegin) + end := max(dataEnd, flagEnd) + + query := fmt.Sprintf( + `SELECT + dato AS time, + d.%[1]s AS data, + f.%[1]s AS flag + FROM + (SELECT dato, stnr, %[1]s FROM %[2]s + WHERE %[1]s IS NOT NULL AND stnr = $1 AND TO_CHAR(dato, 'yyyy') = $2) d + FULL OUTER JOIN + (SELECT dato, stnr, %[1]s FROM %[3]s + WHERE %[1]s IS NOT NULL AND stnr = $1 AND TO_CHAR(dato, 'yyyy') = $2) f + USING (dato)`, + args.element, + args.dataTable, + args.flagTable, + ) + + for year := begin; year < end; year++ { + yearPath := filepath.Join(path, fmt.Sprint(year)) + if err := os.MkdirAll(path, os.ModePerm); err != nil { + slog.Error(logStr + err.Error()) + continue + } + + filename := filepath.Join(yearPath, args.element+".csv") + if err := fileExists(filename); err != nil && !overwrite { + slog.Warn(logStr + err.Error()) + continue + } + + rows, err := pool.Query(context.TODO(), query, args.station, year) + if err != nil { + slog.Error(logStr + "Could not query KDVH - " + err.Error()) + continue + } + + if err := writeToCsv(filename, rows); err != nil { + slog.Error(logStr + err.Error()) + continue + } + } + + return nil +} + +// T_HOMOGEN_MONTH contains seasonal and annual data, plus other derivative +// data combining both of these. We decided to dump only the monthly data (season BETWEEN 1 AND 12) for +// - TAM (mean hourly temperature), and +// - RR (hourly precipitations, note that in Stinfosys this parameter is 'RR_1') +// +// We calculate the other data on the fly (outside this program) if needed. +func dumpHomogenMonth(path string, args dumpArgs, logStr string, overwrite bool, pool *pgxpool.Pool) error { + filename := filepath.Join(path, args.element+".csv") + if err := fileExists(filename); err != nil && !overwrite { + slog.Warn(logStr + err.Error()) + return err + } + + query := fmt.Sprintf( + `SELECT dato AS time, %s[1]s AS data, '' AS flag FROM T_HOMOGEN_MONTH + WHERE %s[1]s IS NOT NULL AND stnr = $1 AND season BETWEEN 1 AND 12`, + // NOTE: adding a dummy argument is the only way to suppress this stupid warning + args.element, "", + ) + + rows, err := pool.Query(context.TODO(), query, args.station) + if err != nil { + slog.Error(logStr + err.Error()) + return err + } + + if err := writeToCsv(filename, rows); err != nil { + slog.Error(logStr + err.Error()) + return err + } + + return nil +} + +// This function is used to dump tables that don't have a FLAG table, +// (T_METARDATA, T_HOMOGEN_DIURNAL) +func dumpDataOnly(path string, args dumpArgs, logStr string, overwrite bool, pool *pgxpool.Pool) error { + filename := filepath.Join(path, args.element+".csv") + if err := fileExists(filename); err != nil && !overwrite { + slog.Warn(logStr + err.Error()) + return err + } + + query := fmt.Sprintf( + `SELECT dato AS time, %[1]s AS data, '' AS flag FROM %[2]s + WHERE %[1]s IS NOT NULL AND stnr = $1`, + args.element, + args.dataTable, + ) + + rows, err := pool.Query(context.TODO(), query, args.station) + if err != nil { + slog.Error(logStr + err.Error()) + return err + } + + if err := writeToCsv(filename, rows); err != nil { + slog.Error(logStr + err.Error()) + return err + } + + return nil +} + +// This is the default dump function. +// It selects both data and flag tables for a specific (station, element) pair, +// and then performs a full outer join on the two subqueries +func dumpDataAndFlags(path string, args dumpArgs, logStr string, overwrite bool, pool *pgxpool.Pool) error { + filename := filepath.Join(path, args.element+".csv") + if err := fileExists(filename); err != nil && !overwrite { + slog.Warn(logStr + err.Error()) + return err + } + + query := fmt.Sprintf( + `SELECT + dato AS time, + d.%[1]s AS data, + f.%[1]s AS flag + FROM + (SELECT dato, %[1]s FROM %[2]s WHERE %[1]s IS NOT NULL AND stnr = $1) d + FULL OUTER JOIN + (SELECT dato, %[1]s FROM %[3]s WHERE %[1]s IS NOT NULL AND stnr = $1) f + USING (dato)`, + args.element, + args.dataTable, + args.flagTable, + ) + + rows, err := pool.Query(context.TODO(), query, args.station) + if err != nil { + slog.Error(logStr + err.Error()) + return err + } + + if err := writeToCsv(filename, rows); err != nil { + if !errors.Is(err, EMPTY_QUERY_ERR) { + slog.Error(logStr + err.Error()) + } + return err + } + + return nil +} + +// Dumps queried rows to file +func writeToCsv(filename string, rows pgx.Rows) error { + lines, err := sortRows(rows) + if err != nil { + return err + } + + // Return if query was empty + if len(lines) == 0 { + return EMPTY_QUERY_ERR + } + + file, err := os.Create(filename) + if err != nil { + return err + } + + err = writeElementFile(lines, file) + if closeErr := file.Close(); closeErr != nil { + return errors.Join(err, closeErr) + } + return err +} + +// Scans the rows and collects them in a slice of chronologically sorted lines +func sortRows(rows pgx.Rows) ([]Record, error) { + defer rows.Close() + + records, err := pgx.CollectRows(rows, pgx.RowToStructByName[Record]) + if err != nil { + return nil, errors.New("Could not collect rows: " + err.Error()) + } + + slices.SortFunc(records, func(a, b Record) int { + return a.Time.Compare(b.Time) + }) + + return records, rows.Err() +} + +// Writes queried (time | data | flag) columns to CSV +func writeElementFile(lines []Record, file io.Writer) error { + // Write number of lines as header + file.Write([]byte(fmt.Sprintf("%v\n", len(lines)))) + + writer := csv.NewWriter(file) + + record := make([]string, 3) + for _, l := range lines { + record[0] = l.Time.Format(TIMEFORMAT) + record[1] = l.Data.String + record[2] = l.Flag.String + + if err := writer.Write(record); err != nil { + return errors.New("Could not write to file: " + err.Error()) + } + } + + writer.Flush() + return writer.Error() +} diff --git a/migrations/kdvh/db/flag_test.go b/migrations/kdvh/db/flag_test.go new file mode 100644 index 00000000..212ab61a --- /dev/null +++ b/migrations/kdvh/db/flag_test.go @@ -0,0 +1,33 @@ +package db + +import ( + "testing" +) + +func TestFlagsAreValid(t *testing.T) { + type testCase struct { + input KdvhObs + expected bool + } + + cases := []testCase{ + {KdvhObs{Flags: "12309"}, true}, + {KdvhObs{Flags: "984.3"}, false}, + {KdvhObs{Flags: ".1111"}, false}, + {KdvhObs{Flags: "1234."}, false}, + {KdvhObs{Flags: "12.2.4"}, false}, + {KdvhObs{Flags: "12.343"}, false}, + {KdvhObs{Flags: ""}, false}, + {KdvhObs{Flags: "asdas"}, false}, + {KdvhObs{Flags: "12a3a"}, false}, + {KdvhObs{Flags: "1sdfl"}, false}, + } + + for _, c := range cases { + t.Log("Testing flag:", c.input.Flags) + + if result := flagsAreValid(&c.input); result != c.expected { + t.Errorf("Got %v, wanted %v", result, c.expected) + } + } +} diff --git a/migrations/kdvh/import/flags.go b/migrations/kdvh/db/flags.go similarity index 99% rename from migrations/kdvh/import/flags.go rename to migrations/kdvh/db/flags.go index 8fdc511b..89fd412f 100644 --- a/migrations/kdvh/import/flags.go +++ b/migrations/kdvh/db/flags.go @@ -1,4 +1,4 @@ -package port +package db // In kvalobs a flag is a 16 char string containg QC information about the observation: // Note: Missing numbers in the following lists are marked as reserved (not in use I guess?) diff --git a/migrations/kdvh/db/main.go b/migrations/kdvh/db/main.go index 81141221..03fb3ac6 100644 --- a/migrations/kdvh/db/main.go +++ b/migrations/kdvh/db/main.go @@ -1,5 +1,15 @@ package db +import ( + "migrate/stinfosys" + "migrate/utils" + "time" + + "github.com/rickb777/period" +) + +const KDVH_ENV_VAR string = "KDVH_PROXY_CONN_STRING" + // Map of all tables found in KDVH, with set max import year type KDVH struct { Tables map[string]*Table @@ -9,39 +19,58 @@ func Init() *KDVH { return &KDVH{map[string]*Table{ // Section 1: tables that need to be migrated entirely // TODO: figure out if we need to use the elem_code_paramid_level_sensor_t_edata table? - "T_EDATA": NewTable("T_EDATA", "T_EFLAG", "T_ELEM_EDATA").SetImportYear(3000), - "T_METARDATA": NewTable("T_METARDATA", "", "T_ELEM_METARDATA").SetImportYear(3000), + "T_EDATA": NewTable("T_EDATA", "T_EFLAG", "T_ELEM_EDATA").SetConvertFunc(convertEdata).SetImportYear(3000), + "T_METARDATA": NewTable("T_METARDATA", "", "T_ELEM_METARDATA").SetDumpFunc(dumpDataOnly).SetImportYear(3000), // Section 2: tables with some data in kvalobs, import only up to 2005-12-31 "T_ADATA": NewTable("T_ADATA", "T_AFLAG", "T_ELEM_OBS").SetImportYear(2006), "T_MDATA": NewTable("T_MDATA", "T_MFLAG", "T_ELEM_OBS").SetImportYear(2006), "T_TJ_DATA": NewTable("T_TJ_DATA", "T_TJ_FLAG", "T_ELEM_OBS").SetImportYear(2006), - "T_PDATA": NewTable("T_PDATA", "T_PFLAG", "T_ELEM_OBS").SetImportYear(2006), - "T_NDATA": NewTable("T_NDATA", "T_NFLAG", "T_ELEM_OBS").SetImportYear(2006), - "T_VDATA": NewTable("T_VDATA", "T_VFLAG", "T_ELEM_OBS").SetImportYear(2006), + "T_PDATA": NewTable("T_PDATA", "T_PFLAG", "T_ELEM_OBS").SetConvertFunc(convertPdata).SetImportYear(2006), + "T_NDATA": NewTable("T_NDATA", "T_NFLAG", "T_ELEM_OBS").SetConvertFunc(convertNdata).SetImportYear(2006), + "T_VDATA": NewTable("T_VDATA", "T_VFLAG", "T_ELEM_OBS").SetConvertFunc(convertVdata).SetImportYear(2006), "T_UTLANDDATA": NewTable("T_UTLANDDATA", "T_UTLANDFLAG", "T_ELEM_OBS").SetImportYear(2006), // Section 3: tables that should only be dumped - "T_10MINUTE_DATA": NewTable("T_10MINUTE_DATA", "T_10MINUTE_FLAG", "T_ELEM_OBS"), + "T_10MINUTE_DATA": NewTable("T_10MINUTE_DATA", "T_10MINUTE_FLAG", "T_ELEM_OBS").SetDumpFunc(dumpByYear), "T_ADATA_LEVEL": NewTable("T_ADATA_LEVEL", "T_AFLAG_LEVEL", "T_ELEM_OBS"), - "T_MINUTE_DATA": NewTable("T_MINUTE_DATA", "T_MINUTE_FLAG", "T_ELEM_OBS"), - "T_SECOND_DATA": NewTable("T_SECOND_DATA", "T_SECOND_FLAG", "T_ELEM_OBS"), + "T_MINUTE_DATA": NewTable("T_MINUTE_DATA", "T_MINUTE_FLAG", "T_ELEM_OBS").SetDumpFunc(dumpByYear), + "T_SECOND_DATA": NewTable("T_SECOND_DATA", "T_SECOND_FLAG", "T_ELEM_OBS").SetDumpFunc(dumpByYear), "T_CDCV_DATA": NewTable("T_CDCV_DATA", "T_CDCV_FLAG", "T_ELEM_EDATA"), "T_MERMAID": NewTable("T_MERMAID", "T_MERMAID_FLAG", "T_ELEM_EDATA"), "T_SVVDATA": NewTable("T_SVVDATA", "T_SVVFLAG", "T_ELEM_OBS"), // Section 4: special cases, namely digitized historical data - "T_MONTH": NewTable("T_MONTH", "T_MONTH_FLAG", "T_ELEM_MONTH").SetImportYear(1957), - "T_DIURNAL": NewTable("T_DIURNAL", "T_DIURNAL_FLAG", "T_ELEM_DIURNAL").SetImportYear(2006), - "T_HOMOGEN_DIURNAL": NewTable("T_HOMOGEN_DIURNAL", "", "T_ELEM_HOMOGEN_MONTH"), - "T_HOMOGEN_MONTH": NewTable("T_HOMOGEN_MONTH", "T_ELEM_HOMOGEN_MONTH", ""), + // TODO: I don't think we want to import these, they are products + "T_MONTH": NewTable("T_MONTH", "T_MONTH_FLAG", "T_ELEM_MONTH").SetConvertFunc(convertProduct).SetImportYear(1957), + "T_DIURNAL": NewTable("T_DIURNAL", "T_DIURNAL_FLAG", "T_ELEM_DIURNAL").SetConvertFunc(convertProduct).SetImportYear(2006), + "T_HOMOGEN_DIURNAL": NewTable("T_HOMOGEN_DIURNAL", "", "T_ELEM_HOMOGEN_MONTH").SetDumpFunc(dumpDataOnly).SetConvertFunc(convertProduct), + "T_HOMOGEN_MONTH": NewTable("T_HOMOGEN_MONTH", "T_ELEM_HOMOGEN_MONTH", "").SetDumpFunc(dumpHomogenMonth).SetConvertFunc(convertProduct), // Section 5: tables missing in the KDVH proxy: // 1. these exist in a separate database "T_AVINOR": NewTable("T_AVINOR", "T_AVINOR_FLAG", "T_ELEM_OBS"), "T_PROJDATA": NewTable("T_PROJDATA", "T_PROJFLAG", "T_ELEM_PROJ"), // 2. these are not in active use and don't need to be imported in LARD - "T_DIURNAL_INTERPOLATED": NewTable("T_DIURNAL_INTERPOLATED", "", ""), + "T_DIURNAL_INTERPOLATED": NewTable("T_DIURNAL_INTERPOLATED", "", "").SetConvertFunc(convertDiurnalInterpolated), "T_MONTH_INTERPOLATED": NewTable("T_MONTH_INTERPOLATED", "", ""), }} } + +// Struct that represent an observation in KDVH +type KdvhObs struct { + Obstime time.Time + Data string + Flags string +} + +// Convenience struct that holds information for a specific timeseries +type TsInfo struct { + Id int32 + Station int32 + Element string + Offset period.Period + Param stinfosys.Param + Timespan utils.TimeSpan + Logstr string +} diff --git a/migrations/kdvh/db/table.go b/migrations/kdvh/db/table.go index a1b9b787..aa28dc29 100644 --- a/migrations/kdvh/db/table.go +++ b/migrations/kdvh/db/table.go @@ -1,5 +1,11 @@ package db +import ( + "github.com/jackc/pgx/v5/pgxpool" + + "migrate/lard" +) + // In KDVH for each table name we usually have three separate tables: // 1. A DATA table containing observation values; // 2. A FLAG table containing quality control (QC) flags; @@ -22,6 +28,8 @@ type Table struct { ElemTableName string // Name of the ELEM table Path string // Directory name of where the dumped table is stored importUntil int // Import data only until the year specified by this field. Table import will be skipped, if `SetImportYear` is not called. + DumpFn DumpFunction + Convert ConvertFunction } // Creates default Table @@ -31,10 +39,42 @@ func NewTable(data, flag, elem string) *Table { FlagTableName: flag, ElemTableName: elem, // NOTE: '_combined' kept for backward compatibility with original scripts - Path: data + "_combined", + Path: data + "_combined", + DumpFn: dumpDataAndFlags, + Convert: convert, } } +// Function used to dump the KDVH table, see below +type DumpFunction func(path string, args dumpArgs, logStr string, overwrite bool, pool *pgxpool.Pool) error +type dumpArgs struct { + element string + station string + dataTable string + flagTable string +} + +// The following ConvertFunctions try to recover the original pair of `controlinfo` +// and `useinfo` generated by Kvalobs for an observation, based on `Obs.Flags` and `Obs.Data` +// Different KDVH tables need different ways to perform this conversion (defined in CONV_MAP). +// +// It returns three structs for each of the lard tables we are inserting into +type ConvertFunction func(*KdvhObs, *TsInfo) (lard.DataObs, lard.TextObs, lard.Flag, error) + +func (t *Table) Dump(path, element, station, logStr string, overwrite bool, pool *pgxpool.Pool) error { + return t.DumpFn(path, dumpArgs{element, station, t.TableName, t.FlagTableName}, logStr, overwrite, pool) +} + +func (t *Table) SetDumpFunc(fn DumpFunction) *Table { + t.DumpFn = fn + return t +} + +func (t *Table) SetConvertFunc(fn ConvertFunction) *Table { + t.Convert = fn + return t +} + // Specify the year until data should be imported func (t *Table) SetImportYear(year int) *Table { if year > 0 { diff --git a/migrations/kdvh/dump/dump.go b/migrations/kdvh/dump/dump.go index 23898e61..88ac0f7d 100644 --- a/migrations/kdvh/dump/dump.go +++ b/migrations/kdvh/dump/dump.go @@ -19,8 +19,11 @@ import ( // List of columns that we do not need to select when extracting the element codes from a KDVH table var INVALID_COLUMNS = []string{"dato", "stnr", "typeid", "season", "xxx"} -func DumpTable(table *db.Table, pool *pgxpool.Pool, config *DumpConfig) { - if err := os.MkdirAll(filepath.Join(config.BaseDir, table.Path), os.ModePerm); err != nil { +func DumpTable(table *db.Table, pool *pgxpool.Pool, config *Config) { + fmt.Printf("Dumping %s...\n", table.TableName) + defer fmt.Println(strings.Repeat("- ", 40)) + + if err := os.MkdirAll(filepath.Join(config.Path, table.Path), os.ModePerm); err != nil { slog.Error(err.Error()) return } @@ -35,42 +38,36 @@ func DumpTable(table *db.Table, pool *pgxpool.Pool, config *DumpConfig) { return } - dumpFunc := getDumpFunc(table) - // Used to limit connections to the database semaphore := make(chan struct{}, config.MaxConn) - bar := utils.NewBar(len(stations), table.TableName) - bar.RenderBlank() for _, station := range stations { - path := filepath.Join(config.BaseDir, table.Path, string(station)) + path := filepath.Join(config.Path, table.Path, station) if err := os.MkdirAll(path, os.ModePerm); err != nil { slog.Error(err.Error()) return } + bar := utils.NewBar(len(elements), fmt.Sprintf("%10s", station)) + bar.RenderBlank() + var wg sync.WaitGroup for _, element := range elements { + wg.Add(1) + // This blocks if the channel is full semaphore <- struct{}{} - - wg.Add(1) go func() { - defer wg.Done() - - err := dumpFunc( - path, - DumpArgs{ - element: element, - station: station, - dataTable: table.TableName, - flagTable: table.FlagTableName, - overwrite: config.Overwrite, - }, - pool, - ) + defer func() { + bar.Add(1) + wg.Done() + }() + + logStr := fmt.Sprintf("%s - %s - %s: ", table.TableName, station, element) + + err := table.Dump(path, element, station, logStr, config.Overwrite, pool) if err == nil { - slog.Info(fmt.Sprintf("%s - %s - %s: dumped successfully", table.TableName, station, element)) + slog.Info(logStr + "dumped successfully") } // Release semaphore @@ -78,18 +75,17 @@ func DumpTable(table *db.Table, pool *pgxpool.Pool, config *DumpConfig) { }() } wg.Wait() - bar.Add(1) } } // Fetches elements and filters them based on user input -func getElements(table *db.Table, pool *pgxpool.Pool, config *DumpConfig) ([]string, error) { +func getElements(table *db.Table, pool *pgxpool.Pool, config *Config) ([]string, error) { elements, err := fetchElements(table, pool) if err != nil { return nil, err } - filename := filepath.Join(config.BaseDir, table.Path, "elements.txt") + filename := filepath.Join(config.Path, table.Path, "elements.txt") if err := utils.SaveToFile(elements, filename); err != nil { slog.Warn(err.Error()) } @@ -138,13 +134,13 @@ func fetchElements(table *db.Table, pool *pgxpool.Pool) (elements []string, err } // Fetches station numbers and filters them based on user input -func getStations(table *db.Table, pool *pgxpool.Pool, config *DumpConfig) ([]string, error) { +func getStations(table *db.Table, pool *pgxpool.Pool, config *Config) ([]string, error) { stations, err := fetchStnrFromElemTable(table, pool) if err != nil { return nil, err } - filename := filepath.Join(config.BaseDir, table.Path, "stations.txt") + filename := filepath.Join(config.Path, table.Path, "stations.txt") if err := utils.SaveToFile(stations, filename); err != nil { slog.Warn(err.Error()) } diff --git a/migrations/kdvh/dump/dump_functions.go b/migrations/kdvh/dump/dump_functions.go deleted file mode 100644 index db6fb82f..00000000 --- a/migrations/kdvh/dump/dump_functions.go +++ /dev/null @@ -1,237 +0,0 @@ -package dump - -import ( - "context" - "errors" - "fmt" - "log/slog" - "os" - "path/filepath" - "strconv" - - "github.com/jackc/pgx/v5/pgxpool" - - "migrate/kdvh/db" -) - -// Function used to dump the KDVH table, see below -type DumpFunction func(path string, meta DumpArgs, pool *pgxpool.Pool) error -type DumpArgs struct { - element string - station string - dataTable string - flagTable string - overwrite bool - logStr string -} - -func getDumpFunc(table *db.Table) DumpFunction { - switch table.TableName { - case "T_METARDATA", "T_HOMOGEN_DIURNAL": - return dumpDataOnly - case "T_SECOND_DATA", "T_MINUTE_DATA", "T_10MINUTE_DATA": - return dumpByYear - case "T_HOMOGEN_MONTH": - return dumpHomogenMonth - } - return dumpDataAndFlags -} - -func fileExists(filename string, overwrite bool) error { - if _, err := os.Stat(filename); err == nil && !overwrite { - return errors.New( - fmt.Sprintf( - "Skipping dump of '%s' because dumped file already exists and the --overwrite flag was not provided", - filename, - )) - } - return nil -} - -// Helper function for dumpByYear functinos Fetch min and max year from table, needed for tables that are dumped by year -func fetchYearRange(tableName, station string, pool *pgxpool.Pool) (int64, int64, error) { - var beginStr, endStr string - query := fmt.Sprintf("SELECT min(to_char(dato, 'yyyy')), max(to_char(dato, 'yyyy')) FROM %s WHERE stnr = $1", tableName) - - if err := pool.QueryRow(context.TODO(), query, station).Scan(&beginStr, &endStr); err != nil { - return 0, 0, fmt.Errorf("Could not query row: %v", err) - } - - begin, err := strconv.ParseInt(beginStr, 10, 64) - if err != nil { - return 0, 0, fmt.Errorf("Could not parse year '%s': %s", beginStr, err) - } - - end, err := strconv.ParseInt(endStr, 10, 64) - if err != nil { - return 0, 0, fmt.Errorf("Could not parse year '%s': %s", endStr, err) - } - - return begin, end, nil -} - -// This function is used when the table contains large amount of data -// (T_SECOND, T_MINUTE, T_10MINUTE) -func dumpByYear(path string, meta DumpArgs, pool *pgxpool.Pool) error { - dataBegin, dataEnd, err := fetchYearRange(meta.dataTable, meta.station, pool) - if err != nil { - return err - } - - flagBegin, flagEnd, err := fetchYearRange(meta.flagTable, meta.station, pool) - if err != nil { - return err - } - - begin := min(dataBegin, flagBegin) - end := max(dataEnd, flagEnd) - - query := fmt.Sprintf( - `SELECT - dato AS time, - d.%[1]s AS data, - f.%[1]s AS flag - FROM - (SELECT dato, stnr, %[1]s FROM %[2]s - WHERE %[1]s IS NOT NULL AND stnr = $1 AND TO_CHAR(dato, 'yyyy') = $2) d - FULL OUTER JOIN - (SELECT dato, stnr, %[1]s FROM %[3]s - WHERE %[1]s IS NOT NULL AND stnr = $1 AND TO_CHAR(dato, 'yyyy') = $2) f - USING (dato)`, - meta.element, - meta.dataTable, - meta.flagTable, - ) - - for year := begin; year < end; year++ { - yearPath := filepath.Join(path, fmt.Sprint(year)) - if err := os.MkdirAll(path, os.ModePerm); err != nil { - slog.Error(meta.logStr + err.Error()) - continue - } - - filename := filepath.Join(yearPath, meta.element+".csv") - if err := fileExists(filename, meta.overwrite); err != nil { - slog.Warn(meta.logStr + err.Error()) - continue - } - - rows, err := pool.Query(context.TODO(), query, meta.station, year) - if err != nil { - slog.Error(meta.logStr + fmt.Sprint("Could not query KDVH: ", err)) - continue - } - - if err := writeToCsv(filename, rows); err != nil { - slog.Error(meta.logStr + err.Error()) - continue - } - } - - return nil -} - -// T_HOMOGEN_MONTH contains seasonal and annual data, plus other derivative -// data combining both of these. We decided to dump only the monthly data (season BETWEEN 1 AND 12) for -// - TAM (mean hourly temperature), and -// - RR (hourly precipitations, note that in Stinfosys this parameter is 'RR_1') -// -// We calculate the other data on the fly (outside this program) if needed. -func dumpHomogenMonth(path string, meta DumpArgs, pool *pgxpool.Pool) error { - filename := filepath.Join(path, meta.element+".csv") - if err := fileExists(filename, meta.overwrite); err != nil { - slog.Warn(meta.logStr + err.Error()) - return err - } - - query := fmt.Sprintf( - `SELECT dato AS time, %s[1]s AS data, '' AS flag FROM T_HOMOGEN_MONTH - WHERE %s[1]s IS NOT NULL AND stnr = $1 AND season BETWEEN 1 AND 12`, - // NOTE: adding a dummy argument is the only way to suppress this stupid warning - meta.element, "", - ) - - rows, err := pool.Query(context.TODO(), query, meta.station) - if err != nil { - slog.Error(meta.logStr + err.Error()) - return err - } - - if err := writeToCsv(filename, rows); err != nil { - slog.Error(meta.logStr + err.Error()) - return err - } - - return nil -} - -// This function is used to dump tables that don't have a FLAG table, -// (T_METARDATA, T_HOMOGEN_DIURNAL) -func dumpDataOnly(path string, meta DumpArgs, pool *pgxpool.Pool) error { - filename := filepath.Join(path, meta.element+".csv") - if err := fileExists(filename, meta.overwrite); err != nil { - slog.Warn(meta.logStr + err.Error()) - return err - } - - query := fmt.Sprintf( - `SELECT dato AS time, %[1]s AS data, '' AS flag FROM %[2]s - WHERE %[1]s IS NOT NULL AND stnr = $1`, - meta.element, - meta.dataTable, - ) - - rows, err := pool.Query(context.TODO(), query, meta.station) - if err != nil { - slog.Error(meta.logStr + err.Error()) - return err - } - - if err := writeToCsv(filename, rows); err != nil { - slog.Error(meta.logStr + err.Error()) - return err - } - - return nil -} - -// This is the default dump function. -// It selects both data and flag tables for a specific (station, element) pair, -// and then performs a full outer join on the two subqueries -func dumpDataAndFlags(path string, meta DumpArgs, pool *pgxpool.Pool) error { - filename := filepath.Join(path, meta.element+".csv") - if err := fileExists(filename, meta.overwrite); err != nil { - slog.Warn(meta.logStr + err.Error()) - return err - } - - query := fmt.Sprintf( - `SELECT - dato AS time, - d.%[1]s AS data, - f.%[1]s AS flag - FROM - (SELECT dato, %[1]s FROM %[2]s WHERE %[1]s IS NOT NULL AND stnr = $1) d - FULL OUTER JOIN - (SELECT dato, %[1]s FROM %[3]s WHERE %[1]s IS NOT NULL AND stnr = $1) f - USING (dato)`, - meta.element, - meta.dataTable, - meta.flagTable, - ) - - rows, err := pool.Query(context.TODO(), query, meta.station) - if err != nil { - slog.Error(meta.logStr + err.Error()) - return err - } - - if err := writeToCsv(filename, rows); err != nil { - if !errors.Is(err, EMPTY_QUERY_ERR) { - slog.Error(meta.logStr + err.Error()) - } - return err - } - - return nil -} diff --git a/migrations/kdvh/dump/main.go b/migrations/kdvh/dump/main.go index 6227b989..51d4541a 100644 --- a/migrations/kdvh/dump/main.go +++ b/migrations/kdvh/dump/main.go @@ -12,21 +12,20 @@ import ( "migrate/utils" ) -type DumpConfig struct { - BaseDir string `short:"p" long:"path" default:"./dumps/kdvh" description:"Location the dumped data will be stored in"` - Tables []string `short:"t" delimiter:"," long:"table" default:"" description:"Optional comma separated list of table names. By default all available tables are processed"` - Stations []string `short:"s" delimiter:"," long:"stnr" default:"" description:"Optional comma separated list of stations IDs. By default all station IDs are processed"` - Elements []string `short:"e" delimiter:"," long:"elem" default:"" description:"Optional comma separated list of element codes. By default all element codes are processed"` - Overwrite bool `long:"overwrite" description:"Overwrite any existing dumped files"` - Email []string `long:"email" delimiter:"," description:"Optional comma separated list of email addresses used to notify if the program crashed"` - MaxConn int `short:"n" long:"conn" default:"4" description:"Max number of concurrent connections allowed to KDVH"` +type Config struct { + Path string `arg:"-p" default:"./dumps/kdvh" help:"Location the dumped data will be stored in"` + Tables []string `arg:"-t" help:"Optional space separated list of table names"` + Stations []string `arg:"-s" help:"Optional space separated list of stations IDs"` + Elements []string `arg:"-e" help:"Optional space separated list of element codes"` + Overwrite bool `help:"Overwrite any existing dumped files"` + MaxConn int `arg:"-n" default:"4" help:"Max number of allowed concurrent connections to KDVH"` } -func (config *DumpConfig) Execute([]string) error { - pool, err := pgxpool.New(context.Background(), os.Getenv("KDVH_PROXY_CONN")) +func (config *Config) Execute() { + pool, err := pgxpool.New(context.Background(), os.Getenv(db.KDVH_ENV_VAR)) if err != nil { slog.Error(err.Error()) - return nil + return } kdvh := db.Init() @@ -38,6 +37,4 @@ func (config *DumpConfig) Execute([]string) error { utils.SetLogFile(table.TableName, "dump") DumpTable(table, pool, config) } - - return nil } diff --git a/migrations/kdvh/dump/write.go b/migrations/kdvh/dump/write.go deleted file mode 100644 index 5e4aec9d..00000000 --- a/migrations/kdvh/dump/write.go +++ /dev/null @@ -1,89 +0,0 @@ -package dump - -import ( - "database/sql" - "encoding/csv" - "errors" - "fmt" - "io" - "os" - "slices" - "time" - - "github.com/jackc/pgx/v5" -) - -// Format string for date field in CSV files -const TIMEFORMAT string = "2006-01-02_15:04:05" - -// Error returned if no observations are found for a (station, element) pair -var EMPTY_QUERY_ERR error = errors.New("The query did not return any rows") - -// Struct representing a single record in the output CSV file -type Record struct { - Time time.Time `db:"time"` - Data sql.NullString `db:"data"` - Flag sql.NullString `db:"flag"` -} - -// Dumps queried rows to file -func writeToCsv(filename string, rows pgx.Rows) error { - lines, err := sortRows(rows) - if err != nil { - return err - } - - // Return if query was empty - if len(lines) == 0 { - return EMPTY_QUERY_ERR - } - - file, err := os.Create(filename) - if err != nil { - return err - } - - err = writeElementFile(lines, file) - if closeErr := file.Close(); closeErr != nil { - return errors.Join(err, closeErr) - } - return err -} - -// Scans the rows and collects them in a slice of chronologically sorted lines -func sortRows(rows pgx.Rows) ([]Record, error) { - defer rows.Close() - - records, err := pgx.CollectRows(rows, pgx.RowToStructByName[Record]) - if err != nil { - return nil, errors.New("Could not collect rows: " + err.Error()) - } - - slices.SortFunc(records, func(a, b Record) int { - return a.Time.Compare(b.Time) - }) - - return records, rows.Err() -} - -// Writes queried (time | data | flag) columns to CSV -func writeElementFile(lines []Record, file io.Writer) error { - // Write number of lines as header - file.Write([]byte(fmt.Sprintf("%v\n", len(lines)))) - - writer := csv.NewWriter(file) - - record := make([]string, 3) - for _, l := range lines { - record[0] = l.Time.Format(TIMEFORMAT) - record[1] = l.Data.String - record[2] = l.Flag.String - - if err := writer.Write(record); err != nil { - return errors.New("Could not write to file: " + err.Error()) - } - } - - writer.Flush() - return writer.Error() -} diff --git a/migrations/kdvh/import/cache/kdvh.go b/migrations/kdvh/import/cache/kdvh.go index 0ca938cd..d756c650 100644 --- a/migrations/kdvh/import/cache/kdvh.go +++ b/migrations/kdvh/import/cache/kdvh.go @@ -10,43 +10,40 @@ import ( "github.com/jackc/pgx/v5" - "migrate/kdvh/db" + kdvh "migrate/kdvh/db" + "migrate/stinfosys" + "migrate/utils" ) // Map of `from_time` and `to_time` for each (table, station, element) triplet. Not present for all parameters -type KDVHMap = map[KDVHKey]Timespan +type KDVHMap = map[KDVHKey]utils.TimeSpan // Used for lookup of fromtime and totime from KDVH type KDVHKey struct { - Inner StinfoKey + Inner stinfosys.Key Station int32 } func newKDVHKey(elem, table string, stnr int32) KDVHKey { - return KDVHKey{StinfoKey{ElemCode: elem, TableName: table}, stnr} + return KDVHKey{stinfosys.Key{ElemCode: elem, TableName: table}, stnr} } -// Timespan stored in KDVH for a given (table, station, element) triplet -type Timespan struct { - FromTime *time.Time `db:"fdato"` - ToTime *time.Time `db:"tdato"` -} - -func cacheKDVH(tables, stations, elements []string, kdvh *db.KDVH) KDVHMap { +// Cache timeseries timespan from KDVH +func cacheKDVH(tables, stations, elements []string, database *kdvh.KDVH) KDVHMap { cache := make(KDVHMap) slog.Info("Connecting to KDVH proxy to cache metadata") ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - conn, err := pgx.Connect(ctx, os.Getenv("KDVH_PROXY_CONN")) + conn, err := pgx.Connect(ctx, os.Getenv(kdvh.KDVH_ENV_VAR)) if err != nil { slog.Error("Could not connect to KDVH proxy. Make sure to be connected to the VPN: " + err.Error()) os.Exit(1) } defer conn.Close(context.TODO()) - for _, t := range kdvh.Tables { + for _, t := range database.Tables { if len(tables) > 0 && !slices.Contains(tables, t.TableName) { continue } @@ -67,13 +64,14 @@ func cacheKDVH(tables, stations, elements []string, kdvh *db.KDVH) KDVHMap { for rows.Next() { var key KDVHKey - var span Timespan + var span utils.TimeSpan + err := rows.Scan( &key.Inner.TableName, &key.Station, &key.Inner.ElemCode, - &span.FromTime, - &span.ToTime, + &span.From, + &span.To, ) if err != nil { diff --git a/migrations/kdvh/import/cache/main.go b/migrations/kdvh/import/cache/main.go index 243c6f6a..7613f013 100644 --- a/migrations/kdvh/import/cache/main.go +++ b/migrations/kdvh/import/cache/main.go @@ -1,74 +1,44 @@ package cache import ( - "context" "errors" "fmt" "log/slog" - "os" - "time" - "github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5/pgxpool" - "github.com/rickb777/period" - "migrate/kdvh/db" + kdvh "migrate/kdvh/db" "migrate/lard" + "migrate/stinfosys" + "migrate/utils" ) type Cache struct { - Offsets OffsetMap - Stinfo StinfoMap - KDVH KDVHMap - ParamPermits ParamPermitMap - StationPermits StationPermitMap + Offsets OffsetMap + Timespans KDVHMap + Elements stinfosys.ElemMap + Permits stinfosys.PermitMaps } // Caches all the metadata needed for import of KDVH tables. // If any error occurs inside here the program will exit. -func CacheMetadata(tables, stations, elements []string, kdvh *db.KDVH) *Cache { - slog.Info("Connecting to Stinfosys to cache metadata") - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - - conn, err := pgx.Connect(ctx, os.Getenv("STINFO_STRING")) - if err != nil { - slog.Error("Could not connect to Stinfosys. Make sure to be connected to the VPN. " + err.Error()) - os.Exit(1) - } - - stinfoMeta := cacheStinfoMeta(tables, elements, kdvh, conn) - stationPermits := cacheStationPermits(conn) - paramPermits := cacheParamPermits(conn) - - conn.Close(context.TODO()) +func CacheMetadata(tables, stations, elements []string, database *kdvh.KDVH) *Cache { + stconn, ctx := stinfosys.Connect() + defer stconn.Close(ctx) return &Cache{ - Stinfo: stinfoMeta, - StationPermits: stationPermits, - ParamPermits: paramPermits, - Offsets: cacheParamOffsets(), - KDVH: cacheKDVH(tables, stations, elements, kdvh), + Elements: stinfosys.CacheElemMap(stconn), + Permits: stinfosys.NewPermitTables(stconn), + Offsets: cacheParamOffsets(), + Timespans: cacheKDVH(tables, stations, elements, database), } } -// Convenience struct that holds information for a specific timeseries -type TsInfo struct { - Id int32 - Station int32 - Element string - Offset period.Period - Param StinfoParam - Span Timespan - Logstr string - IsOpen bool -} - -func (cache *Cache) NewTsInfo(table, element string, station int32, pool *pgxpool.Pool) (*TsInfo, error) { - logstr := fmt.Sprintf("%v - %v - %v: ", table, station, element) +func (cache *Cache) NewTsInfo(table, element string, station int32, pool *pgxpool.Pool) (*kdvh.TsInfo, error) { + logstr := fmt.Sprintf("[%v - %v - %v]: ", table, station, element) key := newKDVHKey(element, table, station) - param, ok := cache.Stinfo[key.Inner] + param, ok := cache.Elements[key.Inner] if !ok { // TODO: should it fail here? How do we deal with data without metadata? slog.Error(logstr + "Missing metadata in Stinfosys") @@ -76,9 +46,8 @@ func (cache *Cache) NewTsInfo(table, element string, station int32, pool *pgxpoo } // Check if data for this station/element is restricted - isOpen := cache.timeseriesIsOpen(station, param.TypeID, param.ParamID) - // TODO: eventually use this to choose which table to use on insert + isOpen := cache.Permits.TimeseriesIsOpen(station, param.TypeID, param.ParamID) if !isOpen { slog.Warn(logstr + "Timeseries data is restricted") return nil, errors.New("Restricted data") @@ -88,7 +57,7 @@ func (cache *Cache) NewTsInfo(table, element string, station int32, pool *pgxpoo offset := cache.Offsets[key.Inner] // No need to check for `!ok`, timespan will be ignored if not in the map - span := cache.KDVH[key] + timespan, ok := cache.Timespans[key] label := lard.Label{ StationID: station, @@ -98,22 +67,20 @@ func (cache *Cache) NewTsInfo(table, element string, station int32, pool *pgxpoo Level: param.Hlevel, } - tsid, err := lard.GetTimeseriesID(label, param.Fromtime, pool) + // TODO: are Param.Fromtime and Span.From different? + tsid, err := lard.GetTimeseriesID(&label, utils.TimeSpan{From: ¶m.Fromtime, To: timespan.To}, pool) if err != nil { slog.Error(logstr + "could not obtain timeseries - " + err.Error()) return nil, err } - // TODO: check if station is restricted - - return &TsInfo{ - Id: tsid, - Station: station, - Element: element, - Offset: offset, - Param: param, - Span: span, - Logstr: logstr, - IsOpen: isOpen, + return &kdvh.TsInfo{ + Id: tsid, + Station: station, + Element: element, + Offset: offset, + Param: param, + Timespan: timespan, + Logstr: logstr, }, nil } diff --git a/migrations/kdvh/import/cache/offsets.go b/migrations/kdvh/import/cache/offsets.go index e39a934b..e51c490b 100644 --- a/migrations/kdvh/import/cache/offsets.go +++ b/migrations/kdvh/import/cache/offsets.go @@ -2,6 +2,7 @@ package cache import ( "log/slog" + "migrate/stinfosys" "os" "github.com/gocarina/gocsv" @@ -9,7 +10,7 @@ import ( ) // Map of offsets used to correct KDVH times for specific parameters -type OffsetMap = map[StinfoKey]period.Period +type OffsetMap = map[stinfosys.Key]period.Period // Caches how to modify the obstime (in KDVH) for certain paramids func cacheParamOffsets() OffsetMap { @@ -58,7 +59,7 @@ func cacheParamOffsets() OffsetMap { os.Exit(1) } - cache[StinfoKey{ElemCode: row.ElemCode, TableName: row.TableName}] = migrationOffset + cache[stinfosys.Key{ElemCode: row.ElemCode, TableName: row.TableName}] = migrationOffset } return cache diff --git a/migrations/kdvh/import/cache/stinfosys.go b/migrations/kdvh/import/cache/stinfosys.go deleted file mode 100644 index c6af589f..00000000 --- a/migrations/kdvh/import/cache/stinfosys.go +++ /dev/null @@ -1,84 +0,0 @@ -package cache - -import ( - "context" - "log/slog" - "os" - "slices" - "time" - - "github.com/jackc/pgx/v5" - - "migrate/kdvh/db" -) - -// Map of metadata used to query timeseries ID in LARD -type StinfoMap = map[StinfoKey]StinfoParam - -// StinfoKey is used for lookup of parameter offsets and metadata from Stinfosys -type StinfoKey struct { - ElemCode string - TableName string -} - -// Subset of elem_map_cfnames_param query with only param info -type StinfoParam struct { - TypeID int32 - ParamID int32 - Hlevel *int32 - Sensor int32 - Fromtime time.Time - IsScalar bool -} - -// Save metadata for later use by quering Stinfosys -func cacheStinfoMeta(tables, elements []string, kdvh *db.KDVH, conn *pgx.Conn) StinfoMap { - cache := make(StinfoMap) - - for _, table := range kdvh.Tables { - if len(tables) > 0 && !slices.Contains(tables, table.TableName) { - continue - } - - // select paramid, elem_code, scalar from elem_map_cfnames_param join param using(paramid) where scalar = false - query := `SELECT elem_code, table_name, typeid, paramid, hlevel, sensor, fromtime, scalar - FROM elem_map_cfnames_param - JOIN param USING(paramid) - WHERE table_name = $1 - AND ($2::text[] = '{}' OR elem_code = ANY($2))` - - rows, err := conn.Query(context.TODO(), query, table.TableName, elements) - if err != nil { - slog.Error(err.Error()) - os.Exit(1) - } - - for rows.Next() { - var key StinfoKey - var param StinfoParam - err := rows.Scan( - &key.ElemCode, - &key.TableName, - ¶m.TypeID, - ¶m.ParamID, - ¶m.Hlevel, - ¶m.Sensor, - ¶m.Fromtime, - ¶m.IsScalar, - ) - if err != nil { - slog.Error(err.Error()) - os.Exit(1) - } - - cache[key] = param - } - - if rows.Err() != nil { - slog.Error(rows.Err().Error()) - os.Exit(1) - } - } - - return cache -} diff --git a/migrations/kdvh/import/import.go b/migrations/kdvh/import/import.go index dd48fbf0..36f3729c 100644 --- a/migrations/kdvh/import/import.go +++ b/migrations/kdvh/import/import.go @@ -15,7 +15,7 @@ import ( "github.com/jackc/pgx/v5/pgxpool" - "migrate/kdvh/db" + kdvh "migrate/kdvh/db" "migrate/kdvh/import/cache" "migrate/lard" "migrate/utils" @@ -24,17 +24,16 @@ import ( // TODO: add CALL_SIGN? It's not in stinfosys? var INVALID_ELEMENTS = []string{"TYPEID", "TAM_NORMAL_9120", "RRA_NORMAL_9120", "OT", "OTN", "OTX", "DD06", "DD12", "DD18"} -func ImportTable(table *db.Table, cache *cache.Cache, pool *pgxpool.Pool, config *Config) (rowsInserted int64) { - stations, err := os.ReadDir(filepath.Join(config.BaseDir, table.Path)) +func ImportTable(table *kdvh.Table, cache *cache.Cache, pool *pgxpool.Pool, config *Config) (rowsInserted int64) { + fmt.Printf("Importing %s...\n", table.TableName) + defer fmt.Println(strings.Repeat("- ", 40)) + + stations, err := os.ReadDir(filepath.Join(config.Path, table.Path)) if err != nil { slog.Warn(err.Error()) return 0 } - convFunc := getConvertFunc(table) - - bar := utils.NewBar(len(stations), table.TableName) - bar.RenderBlank() for _, station := range stations { stnr, err := getStationNumber(station, config.Stations) if err != nil { @@ -44,68 +43,66 @@ func ImportTable(table *db.Table, cache *cache.Cache, pool *pgxpool.Pool, config continue } - dir := filepath.Join(config.BaseDir, table.Path, station.Name()) - elements, err := os.ReadDir(dir) + stationDir := filepath.Join(config.Path, table.Path, station.Name()) + elements, err := os.ReadDir(stationDir) if err != nil { slog.Warn(err.Error()) continue } + bar := utils.NewBar(len(elements), fmt.Sprintf("%10s", station.Name())) + bar.RenderBlank() + var wg sync.WaitGroup for _, element := range elements { - elemCode, err := getElementCode(element, config.Elements) - if err != nil { - if config.Verbose { - slog.Info(err.Error()) - } - continue - } - wg.Add(1) go func() { - defer wg.Done() + defer func() { + bar.Add(1) + wg.Done() + }() + + elemCode, err := getElementCode(element, config.Elements) + if err != nil { + if config.Verbose { + slog.Info(err.Error()) + } + return + } tsInfo, err := cache.NewTsInfo(table.TableName, elemCode, stnr, pool) if err != nil { return } - filename := filepath.Join(dir, element.Name()) - data, text, flag, err := parseData(filename, tsInfo, convFunc, table, config) + filename := filepath.Join(stationDir, element.Name()) + data, text, flag, err := parseData(filename, tsInfo, table, config) if err != nil { return } var count int64 - if !(config.Skip == "data") { - if tsInfo.Param.IsScalar { - count, err = lard.InsertData(data, pool, tsInfo.Logstr) - if err != nil { - slog.Error(tsInfo.Logstr + "failed data bulk insertion - " + err.Error()) - return - } - } else { - count, err = lard.InsertTextData(text, pool, tsInfo.Logstr) - if err != nil { - slog.Error(tsInfo.Logstr + "failed non-scalar data bulk insertion - " + err.Error()) - return - } - // TODO: should we skip inserting flags here? In kvalobs there are no flags for text data - // return count, nil + if tsInfo.Param.IsScalar { + count, err = lard.InsertData(data, pool, tsInfo.Logstr) + if err != nil { + slog.Error(tsInfo.Logstr + "failed data bulk insertion - " + err.Error()) + return } - } - - if !(config.Skip == "flags") { if err := lard.InsertFlags(flag, pool, tsInfo.Logstr); err != nil { slog.Error(tsInfo.Logstr + "failed flag bulk insertion - " + err.Error()) } + } else { + count, err = lard.InsertTextData(text, pool, tsInfo.Logstr) + if err != nil { + slog.Error(tsInfo.Logstr + "failed non-scalar data bulk insertion - " + err.Error()) + return + } } rowsInserted += count }() } wg.Wait() - bar.Add(1) } outputStr := fmt.Sprintf("%v: %v total rows inserted", table.TableName, rowsInserted) @@ -140,18 +137,18 @@ func getElementCode(element os.DirEntry, elementList []string) (string, error) { elemCode := strings.ToUpper(strings.TrimSuffix(element.Name(), ".csv")) if len(elementList) > 0 && !slices.Contains(elementList, elemCode) { - return "", errors.New(fmt.Sprintf("Element '%s' not in the list, skipping", elemCode)) + return "", errors.New(fmt.Sprintf("Element %q not in the list, skipping", elemCode)) } if elemcodeIsInvalid(elemCode) { - return "", errors.New(fmt.Sprintf("Element '%s' not set for import, skipping", elemCode)) + return "", errors.New(fmt.Sprintf("Element %q not set for import, skipping", elemCode)) } return elemCode, nil } // Parses the observations in the CSV file, converts them with the table // ConvertFunction and returns three arrays that can be passed to pgx.CopyFromRows -func parseData(filename string, tsInfo *cache.TsInfo, convFunc ConvertFunction, table *db.Table, config *Config) ([][]any, [][]any, [][]any, error) { +func parseData(filename string, tsInfo *kdvh.TsInfo, table *kdvh.Table, config *Config) ([][]any, [][]any, [][]any, error) { file, err := os.Open(filename) if err != nil { slog.Warn(err.Error()) @@ -181,9 +178,9 @@ func parseData(filename string, tsInfo *cache.TsInfo, convFunc ConvertFunction, } // Only import data between KDVH's defined fromtime and totime - if tsInfo.Span.FromTime != nil && obsTime.Sub(*tsInfo.Span.FromTime) < 0 { + if tsInfo.Timespan.From != nil && obsTime.Sub(*tsInfo.Timespan.From) < 0 { continue - } else if tsInfo.Span.ToTime != nil && obsTime.Sub(*tsInfo.Span.ToTime) > 0 { + } else if tsInfo.Timespan.To != nil && obsTime.Sub(*tsInfo.Timespan.To) > 0 { break } @@ -191,7 +188,8 @@ func parseData(filename string, tsInfo *cache.TsInfo, convFunc ConvertFunction, break } - dataRow, textRow, flagRow, err := convFunc(KdvhObs{tsInfo, obsTime, cols[1], cols[2]}) + obs := kdvh.KdvhObs{Obstime: obsTime, Data: cols[1], Flags: cols[2]} + dataRow, textRow, flagRow, err := table.Convert(&obs, tsInfo) if err != nil { return nil, nil, nil, err } diff --git a/migrations/kdvh/import/import_test.go b/migrations/kdvh/import/import_test.go deleted file mode 100644 index d5f8eafb..00000000 --- a/migrations/kdvh/import/import_test.go +++ /dev/null @@ -1,31 +0,0 @@ -package port - -import "testing" - -func TestFlagsAreValid(t *testing.T) { - type testCase struct { - input KdvhObs - expected bool - } - - cases := []testCase{ - {KdvhObs{flags: "12309"}, true}, - {KdvhObs{flags: "984.3"}, false}, - {KdvhObs{flags: ".1111"}, false}, - {KdvhObs{flags: "1234."}, false}, - {KdvhObs{flags: "12.2.4"}, false}, - {KdvhObs{flags: "12.343"}, false}, - {KdvhObs{flags: ""}, false}, - {KdvhObs{flags: "asdas"}, false}, - {KdvhObs{flags: "12a3a"}, false}, - {KdvhObs{flags: "1sdfl"}, false}, - } - - for _, c := range cases { - t.Log("Testing flag:", c.input.flags) - - if result := c.input.flagsAreValid(); result != c.expected { - t.Errorf("Got %v, wanted %v", result, c.expected) - } - } -} diff --git a/migrations/kdvh/import/main.go b/migrations/kdvh/import/main.go index e45f9dd9..223900e5 100644 --- a/migrations/kdvh/import/main.go +++ b/migrations/kdvh/import/main.go @@ -10,53 +10,55 @@ import ( "github.com/jackc/pgx/v5/pgxpool" - "migrate/kdvh/db" + kdvh "migrate/kdvh/db" "migrate/kdvh/import/cache" + "migrate/lard" "migrate/utils" ) type Config struct { - Verbose bool `short:"v" description:"Increase verbosity level"` - BaseDir string `short:"p" long:"path" default:"./dumps/kdvh" description:"Location the dumped data will be stored in"` - Tables []string `short:"t" long:"table" delimiter:"," default:"" description:"Optional comma separated list of table names. By default all available tables are processed"` - Stations []string `short:"s" long:"station" delimiter:"," default:"" description:"Optional comma separated list of stations IDs. By default all station IDs are processed"` - Elements []string `short:"e" long:"elemcode" delimiter:"," default:"" description:"Optional comma separated list of element codes. By default all element codes are processed"` - Sep string `long:"sep" default:"," description:"Separator character in the dumped files. Needs to be quoted"` - HasHeader bool `long:"header" description:"Add this flag if the dumped files have a header row"` - Skip string `long:"skip" choice:"data" choice:"flags" description:"Skip import of data or flags"` - Email []string `long:"email" delimiter:"," description:"Optional comma separated list of email addresses used to notify if the program crashed"` - Reindex bool `long:"reindex" description:"Drops PG indices before insertion. Might improve performance"` + Verbose bool `arg:"-v" help:"Increase verbosity level"` + Path string `arg:"-p" default:"./dumps/kdvh" help:"Location the dumped data will be stored in"` + BaseDir string `arg:"-p,--path" default:"./dumps/kdvh" help:"Location the dumped data will be stored in"` + Tables []string `arg:"-t" help:"Optional space separated list of table names"` + Stations []string `arg:"-s" help:"Optional space separated list of stations IDs"` + Elements []string `arg:"-e" help:"Optional space separated list of element codes"` + Sep string `default:"," help:"Separator character in the dumped files. Needs to be quoted"` + HasHeader bool `help:"Add this flag if the dumped files have a header row"` + // TODO: this isn't implemented in go-arg + // Skip string `choice:"data" choice:"flags" help:"Skip import of data or flags"` + Reindex bool `help:"Drop PG indices before insertion. Might improve performance"` } -func (config *Config) Execute([]string) error { +func (config *Config) Execute() { if len(config.Sep) > 1 { fmt.Printf("Error: '--sep' only accepts single-byte characters. Got %s", config.Sep) os.Exit(1) } slog.Info("Import started!") - kdvh := db.Init() + database := kdvh.Init() // Cache metadata from Stinfosys, KDVH, and local `product_offsets.csv` - cache := cache.CacheMetadata(config.Tables, config.Stations, config.Elements, kdvh) + cache := cache.CacheMetadata(config.Tables, config.Stations, config.Elements, database) // Create connection pool for LARD - pool, err := pgxpool.New(context.TODO(), os.Getenv("LARD_STRING")) + pool, err := pgxpool.New(context.TODO(), os.Getenv(lard.LARD_ENV_VAR)) if err != nil { slog.Error(fmt.Sprint("Could not connect to Lard:", err)) - return err + return } defer pool.Close() if config.Reindex { - dropIndices(pool) + utils.DropIndices(pool) } // Recreate indices even in case the main function panics defer func() { r := recover() if config.Reindex { - createIndices(pool) + utils.CreateIndices(pool) } if r != nil { @@ -64,7 +66,7 @@ func (config *Config) Execute([]string) error { } }() - for _, table := range kdvh.Tables { + for _, table := range database.Tables { if len(config.Tables) > 0 && !slices.Contains(config.Tables, table.TableName) { continue } @@ -82,36 +84,4 @@ func (config *Config) Execute([]string) error { log.SetOutput(os.Stdout) slog.Info("Import complete!") - return nil -} - -func dropIndices(pool *pgxpool.Pool) { - slog.Info("Dropping table indices...") - - file, err := os.ReadFile("../db/drop_indices.sql") - if err != nil { - panic(err.Error()) - } - - _, err = pool.Exec(context.Background(), string(file)) - if err != nil { - panic(err.Error()) - } -} - -func createIndices(pool *pgxpool.Pool) { - slog.Info("Recreating table indices...") - - files := []string{"../db/public.sql", "../db/flags.sql"} - for _, filename := range files { - file, err := os.ReadFile(filename) - if err != nil { - panic(err.Error()) - } - - _, err = pool.Exec(context.Background(), string(file)) - if err != nil { - panic(err.Error()) - } - } } diff --git a/migrations/kdvh/list/main.go b/migrations/kdvh/list/main.go index 579d620f..4774f55c 100644 --- a/migrations/kdvh/list/main.go +++ b/migrations/kdvh/list/main.go @@ -9,7 +9,7 @@ import ( type Config struct{} -func (config *Config) Execute(_ []string) error { +func (config *Config) Execute() { fmt.Println("Available tables in KDVH:") kdvh := db.Init() @@ -23,6 +23,4 @@ func (config *Config) Execute(_ []string) error { for _, table := range tables { fmt.Println(" -", table) } - - return nil } diff --git a/migrations/kdvh/main.go b/migrations/kdvh/main.go index 2ad6c06f..7cb41c17 100644 --- a/migrations/kdvh/main.go +++ b/migrations/kdvh/main.go @@ -1,6 +1,11 @@ package kdvh import ( + "fmt" + "os" + + "github.com/alexflint/go-arg" + "migrate/kdvh/dump" port "migrate/kdvh/import" "migrate/kdvh/list" @@ -8,7 +13,22 @@ import ( // Command line arguments for KDVH migrations type Cmd struct { - Dump dump.DumpConfig `command:"dump" description:"Dump tables from KDVH to CSV"` - Import port.Config `command:"import" description:"Import CSV file dumped from KDVH"` - List list.Config `command:"list" description:"List available KDVH tables"` + Dump *dump.Config `arg:"subcommand" help:"Dump tables from KDVH to CSV"` + Import *port.Config `arg:"subcommand" help:"Import CSV file dumped from KDVH"` + List *list.Config `arg:"subcommand" help:"List available KDVH tables"` +} + +func (c *Cmd) Execute(parser *arg.Parser) { + switch { + case c.Dump != nil: + c.Dump.Execute() + case c.Import != nil: + c.Import.Execute() + case c.List != nil: + c.List.Execute() + default: + fmt.Println("Error: passing a subcommand is required.") + fmt.Println() + parser.WriteHelpForSubcommand(os.Stdout, "kdvh") + } } diff --git a/migrations/kvalobs/check/main.go b/migrations/kvalobs/check/main.go new file mode 100644 index 00000000..244ded91 --- /dev/null +++ b/migrations/kvalobs/check/main.go @@ -0,0 +1,104 @@ +package check + +import ( + "errors" + "fmt" + "log" + "slices" + "strings" + + "migrate/kvalobs/db" + "migrate/stinfosys" +) + +type Config struct { + DataFilename string `arg:"positional" required:"true" help:"data label file"` + TextFilename string `arg:"positional" required:"true" help:"text label file"` +} + +func (c *Config) Execute() { + dataParamids, derr := loadParamids(c.DataFilename) + textParamids, terr := loadParamids(c.TextFilename) + if derr != nil || terr != nil { + fmt.Println(errors.Join(derr, terr)) + return + } + + fmt.Println("Checking if some param IDs are stored in both the `data` and `text_data` tables") + c.checkDataAndTextParamsOverlap(dataParamids, textParamids) + + fmt.Println("Checking if param IDs in `text_data` match non-scalar parameters in Stinfosys") + conn, ctx := stinfosys.Connect() + defer conn.Close(ctx) + stinfoParams := stinfosys.GetNonScalars(conn) + c.checkNonScalars(dataParamids, textParamids, stinfoParams) +} + +// Simply checks if some params are found both in the data and text_data +func (c *Config) checkDataAndTextParamsOverlap(dataParamids, textParamids map[int32]int32) { + defer fmt.Println(strings.Repeat("- ", 40)) + + ids := make([]int32, 0, len(textParamids)) + for id := range dataParamids { + if _, ok := textParamids[id]; ok { + ids = append(ids, id) + } + } + + slices.Sort(ids) + for _, id := range ids { + fmt.Printf("ParamID %5d exists in both data and text tables\n", id) + } +} + +func loadParamids(path string) (map[int32]int32, error) { + labels, err := db.ReadLabelCSV(path) + if err != nil { + log.Println(err) + return nil, err + } + paramids := uniqueParamids(labels) + return paramids, nil + +} + +// Creates hashset of paramids +func uniqueParamids(labels []*db.Label) map[int32]int32 { + paramids := make(map[int32]int32) + for _, label := range labels { + paramids[label.ParamID] += 1 + } + return paramids +} + +type StinfoPair struct { + ParamID int32 `db:"paramid"` + IsScalar bool `db:"scalar"` +} + +// Checks that text params in Kvalobs are considered non-scalar in Stinfosys +func (c *Config) checkNonScalars(dataParamids, textParamids map[int32]int32, nonscalars []int32) { + defer fmt.Println(strings.Repeat("- ", 40)) + + for _, id := range nonscalars { + if _, ok := textParamids[id]; ok { + fmt.Printf("MATCH: ParamID %5d is text in both Stinfosys and Kvalobs\n", id) + delete(textParamids, id) + } else if _, ok := dataParamids[id]; ok { + fmt.Printf(" FAIL: ParamID %5d is text in Stinfosys, but not in Kvalobs\n", id) + } else { + fmt.Printf(" WARN: ParamID %5d not found in Kvalobs\n", id) + } + } + + idsLeft := make([]int32, 0, len(textParamids)) + for id := range textParamids { + idsLeft = append(idsLeft, id) + } + + slices.Sort(idsLeft) + for _, id := range idsLeft { + fmt.Printf(" FAIL: ParamID %5d is text in Kvalobs, but not in Stinfosys\n", id) + } + +} diff --git a/migrations/kvalobs/db/base_config.go b/migrations/kvalobs/db/base_config.go new file mode 100644 index 00000000..544ca68f --- /dev/null +++ b/migrations/kvalobs/db/base_config.go @@ -0,0 +1,37 @@ +package db + +import ( + "time" + + "migrate/utils" +) + +// TODO: should we use this one as default or process all times +// TODO: it looks like histkvalobs has data only starting from 2023-06-01? +var FROMTIME time.Time = time.Date(2006, 01, 01, 00, 00, 00, 00, time.UTC) + +type BaseConfig struct { + Path string `arg:"-p" default:"./dumps" help:"Location the dumped data will be stored in"` + FromTime *utils.Timestamp `arg:"--from" help:"Fetch data only starting from this date-only timestamp"` + ToTime *utils.Timestamp `arg:"--to" help:"Fetch data only until this date-only timestamp"` + Database string `arg:"--db" help:"Which database to process, all by default. Choices: ['kvalobs', 'histkvalobs']"` + Table string `help:"Which table to process, all by default. Choices: ['data', 'text_data']"` + Stations []int32 `help:"Optional space separated list of station numbers"` + TypeIds []int32 `help:"Optional space separated list of type IDs"` + ParamIds []int32 `help:"Optional space separated list of param IDs"` + Sensors []int32 `help:"Optional space separated list of sensors"` + Levels []int32 `help:"Optional space separated list of levels"` +} + +func (config *BaseConfig) ShouldProcessLabel(label *Label) bool { + return utils.IsEmptyOrContains(config.ParamIds, label.ParamID) && + // utils.IsEmptyOrContains(config.Stations, label.StationID) && + utils.IsEmptyOrContains(config.TypeIds, label.TypeID) && + // TODO: these two should never be null anyway? + utils.IsEmptyOrContainsPtr(config.Sensors, label.Sensor) && + utils.IsEmptyOrContainsPtr(config.Levels, label.Level) +} + +func (config *BaseConfig) TimeSpan() *utils.TimeSpan { + return &utils.TimeSpan{From: config.FromTime.Inner(), To: config.ToTime.Inner()} +} diff --git a/migrations/kvalobs/db/config_test.go b/migrations/kvalobs/db/config_test.go new file mode 100644 index 00000000..555a7447 --- /dev/null +++ b/migrations/kvalobs/db/config_test.go @@ -0,0 +1,58 @@ +package db + +import ( + "testing" +) + +func TestShouldProcessLabel(t *testing.T) { + type TestCase struct { + tag string + label Label + config BaseConfig + expected bool + } + + cases := []TestCase{ + { + tag: "empty config", + label: Label{ParamID: 212}, + config: BaseConfig{}, + expected: true, + }, + { + tag: "label paramid in config paramids", + label: Label{ParamID: 212}, + config: BaseConfig{ParamIds: []int32{212}}, + expected: true, + }, + { + tag: "label paramid NOT in config paramids", + label: Label{ParamID: 212}, + config: BaseConfig{ParamIds: []int32{300}}, + expected: false, + }, + { + tag: "label level NOT in config level", + label: Label{}, + config: BaseConfig{Levels: []int32{2}}, + expected: false, + }, + { + tag: "label level in config levels", + label: func() Label { + var level int32 = 2 + return Label{Level: &level} + }(), + config: BaseConfig{Levels: []int32{2}}, + expected: true, + }, + } + + for _, c := range cases { + t.Log(c.tag) + res := c.config.ShouldProcessLabel(&c.label) + if res != c.expected { + t.Fail() + } + } +} diff --git a/migrations/kvalobs/db/csv_parsers.go b/migrations/kvalobs/db/csv_parsers.go new file mode 100644 index 00000000..ada02d32 --- /dev/null +++ b/migrations/kvalobs/db/csv_parsers.go @@ -0,0 +1,188 @@ +package db + +import ( + "bufio" + "migrate/lard" + "migrate/utils" + "slices" + "strconv" + "strings" + "time" +) + +func parseDataCSV(tsid int32, rowCount int, timespan *utils.TimeSpan, scanner *bufio.Scanner) ([][]any, [][]any, error) { + data := make([][]any, 0, rowCount) + flags := make([][]any, 0, rowCount) + var originalPtr, correctedPtr *float32 + for scanner.Scan() { + // obstime, original, tbtime, corrected, controlinfo, useinfo, cfailed + // We don't parse tbtime + fields := strings.Split(scanner.Text(), ",") + + obstime, err := time.Parse(time.RFC3339, fields[0]) + if err != nil { + return nil, nil, err + } + + if timespan.From != nil && obstime.Sub(*timespan.From) < 0 { + continue + } + if timespan.To != nil && obstime.Sub(*timespan.To) > 0 { + break + } + + obsvalue64, err := strconv.ParseFloat(fields[1], 32) + if err != nil { + return nil, nil, err + } + + corrected64, err := strconv.ParseFloat(fields[1], 32) + if err != nil { + return nil, nil, err + } + + original := float32(obsvalue64) + corrected := float32(corrected64) + + // Filter out special values that in Kvalobs stand for null observations + if !slices.Contains(NULL_VALUES, original) { + originalPtr = &original + } + if !slices.Contains(NULL_VALUES, corrected) { + correctedPtr = &corrected + } + + // Original value is inserted in main data table + lardObs := lard.DataObs{ + Id: tsid, + Obstime: obstime, + Data: originalPtr, + } + + var cfailed *string + if fields[6] != "" { + cfailed = &fields[6] + } + + flag := lard.Flag{ + Id: tsid, + Obstime: obstime, + Original: originalPtr, + Corrected: correctedPtr, + Controlinfo: &fields[4], // Never null, has default value in Kvalobs + Useinfo: &fields[5], // Never null, has default value in Kvalobs + Cfailed: cfailed, + } + + data = append(data, lardObs.ToRow()) + flags = append(flags, flag.ToRow()) + } + + return data, flags, nil +} + +// Text obs are not flagged +func parseTextCSV(tsid int32, rowCount int, timespan *utils.TimeSpan, scanner *bufio.Scanner) ([][]any, error) { + data := make([][]any, 0, rowCount) + for scanner.Scan() { + // obstime, original, tbtime + fields := strings.Split(scanner.Text(), ",") + + obstime, err := time.Parse(time.RFC3339, fields[0]) + if err != nil { + return nil, err + } + + if timespan.From != nil && obstime.Sub(*timespan.From) < 0 { + continue + } + if timespan.To != nil && obstime.Sub(*timespan.To) > 0 { + break + } + + lardObs := lard.TextObs{ + Id: tsid, + Obstime: obstime, + Text: &fields[1], + } + + data = append(data, lardObs.ToRow()) + } + + return data, nil +} + +// Function for paramids 2751, 2752, 2753, 2754 that were stored as text data +// but should instead be treated as scalars +// TODO: I'm not sure these params should be scalars given that the other cloud types are not. +// Should all cloud types be integers or text? +func parseMetarCloudType(tsid int32, rowCount int, timespan *utils.TimeSpan, scanner *bufio.Scanner) ([][]any, error) { + data := make([][]any, 0, rowCount) + for scanner.Scan() { + // obstime, original, tbtime + fields := strings.Split(scanner.Text(), ",") + + obstime, err := time.Parse(time.RFC3339, fields[0]) + if err != nil { + return nil, err + } + + if timespan.From != nil && obstime.Sub(*timespan.From) < 0 { + continue + } + if timespan.To != nil && obstime.Sub(*timespan.To) > 0 { + break + } + + val, err := strconv.ParseFloat(fields[1], 32) + if err != nil { + return nil, err + } + + original := float32(val) + lardObs := lard.DataObs{ + Id: tsid, + Obstime: obstime, + Data: &original, + } + + data = append(data, lardObs.ToRow()) + } + + // TODO: Original text obs were not flagged, so we don't return a flags? + // Or should we return default values? + return data, nil +} + +// Function for paramids 305, 306, 307, 308 that were stored as scalar data +// but should be treated as text +func parseSpecialCloudType(tsid int32, rowCount int, timespan *utils.TimeSpan, scanner *bufio.Scanner) ([][]any, error) { + data := make([][]any, 0, rowCount) + for scanner.Scan() { + // obstime, original, tbtime, corrected, controlinfo, useinfo, cfailed + // TODO: should parse everything and return the flags? + fields := strings.Split(scanner.Text(), ",") + + obstime, err := time.Parse(time.RFC3339, fields[0]) + if err != nil { + return nil, err + } + + if timespan.From != nil && obstime.Sub(*timespan.From) < 0 { + continue + } + if timespan.To != nil && obstime.Sub(*timespan.To) > 0 { + break + } + + lardObs := lard.TextObs{ + Id: tsid, + Obstime: obstime, + Text: &fields[1], + } + + data = append(data, lardObs.ToRow()) + } + + return data, nil +} diff --git a/migrations/kvalobs/db/import_functions.go b/migrations/kvalobs/db/import_functions.go new file mode 100644 index 00000000..5b5a8327 --- /dev/null +++ b/migrations/kvalobs/db/import_functions.go @@ -0,0 +1,115 @@ +package db + +import ( + "bufio" + "log/slog" + "migrate/lard" + "migrate/utils" + "os" + "strconv" + + "github.com/jackc/pgx/v5/pgxpool" +) + +// NOTE: +// - for both kvalobs and histkvalobs: +// - all stinfo non-scalar params that can be found in Kvalobs are stored in `text_data` +// - 305, 306, 307, 308 are also in `data` but should be treated as `text_data` +// => should always use readDataCSV and lard.InsertData for these +// - only for histkvalobs +// - 2751, 2752, 2753, 2754 are in `text_data` but should be treated as `data`? + +func importData(tsid int32, label *Label, filename, logStr string, timespan *utils.TimeSpan, pool *pgxpool.Pool) (int64, error) { + file, err := os.Open(filename) + if err != nil { + slog.Error(logStr + err.Error()) + return 0, err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + + // Parse number of rows + scanner.Scan() + rowCount, _ := strconv.Atoi(scanner.Text()) + + // Skip header + scanner.Scan() + + if label.IsSpecialCloudType() { + text, err := parseSpecialCloudType(tsid, rowCount, timespan, scanner) + if err != nil { + slog.Error(logStr + err.Error()) + return 0, err + } + + count, err := lard.InsertTextData(text, pool, logStr) + if err != nil { + slog.Error(logStr + err.Error()) + return 0, err + } + + return count, nil + } + + data, flags, err := parseDataCSV(tsid, rowCount, timespan, scanner) + count, err := lard.InsertData(data, pool, logStr) + if err != nil { + slog.Error(logStr + err.Error()) + return 0, err + } + + if err := lard.InsertFlags(flags, pool, logStr); err != nil { + slog.Error(logStr + err.Error()) + return 0, err + } + + return count, nil +} + +func importText(tsid int32, label *Label, filename, logStr string, timespan *utils.TimeSpan, pool *pgxpool.Pool) (int64, error) { + file, err := os.Open(filename) + if err != nil { + slog.Error(logStr + err.Error()) + return 0, err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + + // Parse number of rows + scanner.Scan() + rowCount, _ := strconv.Atoi(scanner.Text()) + + // Skip header + scanner.Scan() + + if label.IsMetarCloudType() { + data, err := parseMetarCloudType(tsid, rowCount, timespan, scanner) + if err != nil { + slog.Error(logStr + err.Error()) + return 0, err + } + count, err := lard.InsertData(data, pool, logStr) + if err != nil { + slog.Error(logStr + err.Error()) + return 0, err + } + + return count, nil + } + + text, err := parseTextCSV(tsid, rowCount, timespan, scanner) + if err != nil { + slog.Error(logStr + err.Error()) + return 0, err + } + + count, err := lard.InsertTextData(text, pool, logStr) + if err != nil { + slog.Error(logStr + err.Error()) + return 0, err + } + + return count, nil +} diff --git a/migrations/kvalobs/db/label.go b/migrations/kvalobs/db/label.go new file mode 100644 index 00000000..0db0173c --- /dev/null +++ b/migrations/kvalobs/db/label.go @@ -0,0 +1,139 @@ +package db + +import ( + "errors" + "fmt" + "log/slog" + "migrate/lard" + "migrate/utils" + "os" + "slices" + "strconv" + "strings" + + "github.com/gocarina/gocsv" +) + +var METAR_CLOUD_TYPES []int32 = []int32{2751, 2752, 2753, 2754} +var SPECIAL_CLOUD_TYPES []int32 = []int32{305, 306, 307, 308} + +// Kvalobs specific label +type Label struct { + StationID int32 `db:"stationid"` + ParamID int32 `db:"paramid"` + TypeID int32 `db:"typeid"` + // These two are not present in the `text_data` tabl + Sensor *int32 `db:"sensor"` // bpchar(1) in `data` table + Level *int32 `db:"level"` + // LogStr string +} + +func (l *Label) IsMetarCloudType() bool { + return slices.Contains(METAR_CLOUD_TYPES, l.ParamID) +} + +func (l *Label) IsSpecialCloudType() bool { + return slices.Contains(SPECIAL_CLOUD_TYPES, l.ParamID) +} + +func (l *Label) sensorLevelString() (string, string) { + var sensor, level string + if l.Sensor != nil { + sensor = fmt.Sprint(*l.Sensor) + } + if l.Level != nil { + level = fmt.Sprint(*l.Level) + } + return sensor, level +} + +func (l *Label) ToFilename() string { + sensor, level := l.sensorLevelString() + return fmt.Sprintf("%v_%v_%v_%v_%v.csv", l.StationID, l.ParamID, l.TypeID, sensor, level) +} + +func (l *Label) LogStr() string { + sensor, level := l.sensorLevelString() + return fmt.Sprintf( + "[%v - %v - %v - %v - %v]: ", + l.StationID, l.ParamID, l.TypeID, sensor, level, + ) +} + +func (l *Label) ToLard() *lard.Label { + label := lard.Label(*l) + return &label +} + +func ReadLabelCSV(path string) (labels []*Label, err error) { + file, err := os.Open(path) + if err != nil { + slog.Error(err.Error()) + return nil, err + } + defer file.Close() + + slog.Info("Reading previously dumped labels from " + path) + err = gocsv.Unmarshal(file, &labels) + if err != nil { + slog.Error(err.Error()) + } + return labels, err +} + +func WriteLabelCSV(path string, labels []*Label) error { + file, err := os.Create(path) + if err != nil { + slog.Error(err.Error()) + return err + } + + slog.Info("Writing timeseries labels to " + path) + err = gocsv.Marshal(labels, file) + if err != nil { + slog.Error(err.Error()) + } else { + slog.Info(fmt.Sprintf("Dumped %d labels!", len(labels))) + } + return err +} + +func parseFilenameFields(s *string) (*int32, error) { + if *s == "" { + return nil, nil + } + res, err := strconv.ParseInt(*s, 10, 32) + if err != nil { + return nil, err + } + out := int32(res) + return &out, nil +} + +// Deserialize filename to LardLabel +func LabelFromFilename(filename string) (*Label, error) { + name := strings.TrimSuffix(filename, ".csv") + + fields := strings.Split(name, "_") + if len(fields) != 5 { + return nil, errors.New("Wrong number of fields in file name: " + filename) + } + + ptrs := make([]*string, len(fields)) + for i := range ptrs { + ptrs[i] = &fields[i] + } + + converted, err := utils.TryMap(ptrs, parseFilenameFields) + if err != nil { + return nil, err + } + + return &Label{ + StationID: *converted[0], + ParamID: *converted[1], + TypeID: *converted[2], + Sensor: converted[3], + Level: converted[4], + }, nil +} diff --git a/migrations/kvalobs/db/label_dump_functions.go b/migrations/kvalobs/db/label_dump_functions.go new file mode 100644 index 00000000..22e18dd2 --- /dev/null +++ b/migrations/kvalobs/db/label_dump_functions.go @@ -0,0 +1,165 @@ +package db + +import ( + "context" + "log/slog" + "migrate/utils" + "slices" + "sync" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" +) + +const OBSDATA_QUERY string = `SELECT DISTINCT paramid, sensor::int, level FROM obsdata +JOIN observations USING(observationid) +WHERE stationid = $1 + AND typeid = $2 + AND ($3::timestamp IS NULL OR obstime >= $3) + AND ($4::timestamp IS NULL OR obstime < $4)` + +const OBSTEXTDATA_QUERY string = `SELECT DISTINCT paramid FROM obstextdata +JOIN observations USING(observationid) +WHERE stationid = $1 + AND typeid = $2 + AND ($3::timestamp IS NULL OR obstime >= $3) + AND ($4::timestamp IS NULL OR obstime < $4)` + +type StationType struct { + stationid int32 + typeid int32 +} + +// Lazily initialized slice of distinct stationids and typeids from the `observations` table +var UNIQUE_STATIONS_TYPES []*StationType = nil + +func initUniqueStationsAndTypeIds(timespan *utils.TimeSpan, pool *pgxpool.Pool) error { + if UNIQUE_STATIONS_TYPES != nil { + return nil + } + + rows, err := pool.Query(context.TODO(), + `SELECT DISTINCT stationid, typeid FROM observations + WHERE ($1::timestamp IS NULL OR obstime >= $1) + AND ($2::timestamp IS NULL OR obstime < $2) + ORDER BY stationid`, + timespan.From, timespan.To) + if err != nil { + return err + } + + UNIQUE_STATIONS_TYPES = make([]*StationType, 0, rows.CommandTag().RowsAffected()) + UNIQUE_STATIONS_TYPES, err = pgx.AppendRows(UNIQUE_STATIONS_TYPES, rows, func(row pgx.CollectableRow) (*StationType, error) { + var label StationType + err := row.Scan(&label.stationid, &label.typeid) + return &label, err + }) + + if err != nil { + return err + } + return nil +} + +func dumpDataLabels(timespan *utils.TimeSpan, pool *pgxpool.Pool, maxConn int) ([]*Label, error) { + // First query stationid and typeid from observations + // Then query paramid, sensor, level from obsdata + // This is faster than querying all of them together from data + slog.Info("Querying data labels...") + if err := initUniqueStationsAndTypeIds(timespan, pool); err != nil { + slog.Error(err.Error()) + return nil, err + } + + bar := utils.NewBar(len(UNIQUE_STATIONS_TYPES), "Stations") + var labels []*Label + var wg sync.WaitGroup + + semaphore := make(chan struct{}, maxConn) + for _, s := range UNIQUE_STATIONS_TYPES { + wg.Add(1) + semaphore <- struct{}{} + + go func() { + defer func() { + bar.Add(1) + wg.Done() + <-semaphore + }() + + rows, err := pool.Query(context.TODO(), OBSDATA_QUERY, s.stationid, s.typeid, timespan.From, timespan.To) + if err != nil { + slog.Error(err.Error()) + return + } + + innerLabels := make([]*Label, 0, rows.CommandTag().RowsAffected()) + innerLabels, err = pgx.AppendRows(innerLabels, rows, func(row pgx.CollectableRow) (*Label, error) { + label := Label{StationID: s.stationid, TypeID: s.typeid} + err := row.Scan(&label.ParamID, &label.Sensor, &label.Level) + return &label, err + }) + + if err != nil { + slog.Error(err.Error()) + return + } + + labels = slices.Concat(labels, innerLabels) + }() + } + + wg.Wait() + + return labels, nil +} + +func dumpTextLabels(timespan *utils.TimeSpan, pool *pgxpool.Pool, maxConn int) ([]*Label, error) { + // First query stationid and typeid from observations + // Then query paramid from obstextdata + // This is faster than querying all of them together from data + slog.Info("Querying text labels...") + if err := initUniqueStationsAndTypeIds(timespan, pool); err != nil { + slog.Error(err.Error()) + return nil, err + } + + bar := utils.NewBar(len(UNIQUE_STATIONS_TYPES), "Stations") + var labels []*Label + var wg sync.WaitGroup + + semaphore := make(chan struct{}, maxConn) + for _, s := range UNIQUE_STATIONS_TYPES { + wg.Add(1) + semaphore <- struct{}{} + + go func() { + defer func() { + bar.Add(1) + wg.Done() + <-semaphore + }() + + rows, err := pool.Query(context.TODO(), OBSTEXTDATA_QUERY, s.stationid, s.typeid, timespan.From, timespan.To) + if err != nil { + slog.Error(err.Error()) + return + } + + innerLabels := make([]*Label, 0, rows.CommandTag().RowsAffected()) + innerLabels, err = pgx.AppendRows(innerLabels, rows, func(row pgx.CollectableRow) (*Label, error) { + label := Label{StationID: s.stationid, TypeID: s.typeid} + err := row.Scan(&label.ParamID) + return &label, err + }) + + if err != nil { + slog.Error(err.Error()) + return + } + labels = slices.Concat(labels, innerLabels) + }() + } + wg.Wait() + return labels, nil +} diff --git a/migrations/kvalobs/db/main.go b/migrations/kvalobs/db/main.go new file mode 100644 index 00000000..fcf24b00 --- /dev/null +++ b/migrations/kvalobs/db/main.go @@ -0,0 +1,148 @@ +package db + +import ( + "time" +) + +// Kvalobs is composed of two databases +// 1) `kvalobs` for fresh data? +// 2) `histkvalobs` for data older than +// +// Both contain the same tables: +// - `algorithms`: stores procedure code (!!!) for QC checks +// - `checks`: stores tags and signatures of QC tests +// - `data`: a view that joins `observations` and `obsvalue` +// +// Column | Type | Collation | Nullable | Default +// -------------+-----------------------------+-----------+----------+---------------------------- +// stationid | integer | | not null | +// obstime | timestamp without time zone | | not null | +// original | double precision | | not null | +// paramid | integer | | not null | +// tbtime | timestamp without time zone | | not null | +// typeid | integer | | not null | +// sensor | character(1) | | | '0'::bpchar +// level | integer | | | 0 +// corrected | double precision | | not null | +// controlinfo | character(16) | | | '0000000000000000'::bpchar +// useinfo | character(16) | | | '0000000000000000'::bpchar +// cfailed | text | | | +// +// - `data_history`: stores the history of QC pipelines for data observations +// +// - `default_missing`: +// - `default_missing_values`: default values for some paramids (-32767) +// - `model`: stores model names +// - `model_data`: stores model data for different stations, paramids, etc. +// +// - `observations`: stores sequential observation IDs for each observations (note the lack of paramid) +// Column | Type | Collation | Nullable | +// ---------------+-----------------------------+-----------+----------+ +// observationid | bigint | | not null | +// stationid | integer | | not null | +// typeid | integer | | not null | +// obstime | timestamp without time zone | | not null | +// tbtime | timestamp without time zone | | not null | +// +// - `obsdata`: where the actual scalar data is stored +// Column | Type | Collation | Nullable | Default +// ---------------+------------------+-----------+----------+---------------------------- +// observationid | bigint | | | +// original | double precision | | not null | +// paramid | integer | | not null | +// sensor | character(1) | | | '0'::bpchar +// level | integer | | | 0 +// corrected | double precision | | not null | +// controlinfo | character(16) | | | '0000000000000000'::bpchar +// useinfo | character(16) | | | '0000000000000000'::bpchar +// cfailed | text | | | +// +// - `obstextdata`: where the actual text data is stored +// Column | Type | Collation | Nullable | Default | +// ---------------+---------+-----------+----------+---------+ +// observationid | bigint | | | | +// original | text | | not null | | +// paramid | integer | | not null | | +// +// - `param`: part of stinfosys `param` table +// Column | Type | Collation | Nullable | Default +// -------------+---------+-----------+----------+--------- +// paramid | integer | | not null | +// name | text | | not null | +// description | text | | | +// unit | text | | | +// level_scale | integer | | | 0 +// comment | text | | | +// scalar | boolean | | | true +// +// - `pdata`: view similar to `data` but with paramid converted to param code +// - `station`: station metadata such as (lat, lon, height, name, wmonr, etc) +// - `station_metadata`: Stores fromtime and totime for `stationid` and optionally `paramid`. +// `typeid`, `sensor`, and `level` are always NULL. +// +// - `text_data`: view that joins `observations` and `obstextdata` +// +// Column | Type | Collation | Nullable | Default +// -----------+-----------------------------+-----------+----------+--------- +// stationid | integer | | not null | +// obstime | timestamp without time zone | | not null | +// original | text | | not null | +// paramid | integer | | not null | +// tbtime | timestamp without time zone | | not null | +// typeid | integer | | not null | +// +// - `text_data_history`: stores the history of QC pipelines for text observations () +// +// IMPORTANT: considerations for migrations to LARD +// - LARD stores Timeseries labels (stationid, paramid, typeid, sensor, level) in a separate table +// - In LARD (sensor, level) can both be NULL, while in Kvalobs they have default values ('0',0) +// => POSSIBLE INCONSISTENCY when importing to LARD +// - Timestamps in Kvalobs are UTC +// - Kvalobs doesn't have the concept of timeseries ID, +// instead there is a sequential ID associated with each observation row + +// Special values that are treated as NULL in Kvalobs +// TODO: are there more values we should be looking for? +var NULL_VALUES []float32 = []float32{-32767, -32766} + +type DataSeries = []*DataObs + +// Kvalobs data table observation row +type DataObs struct { + Obstime time.Time `db:"obstime"` + Original float64 `db:"original"` + Tbtime time.Time `db:"tbtime"` + Corrected float64 `db:"corrected"` + Controlinfo *string `db:"controlinfo"` + Useinfo *string `db:"useinfo"` + Cfailed *string `db:"cfailed"` +} + +type TextSeries = []*TextObs + +// Kvalobs text_data table observation row +type TextObs struct { + Obstime time.Time `db:"obstime"` + Original string `db:"original"` + Tbtime time.Time `db:"tbtime"` +} + +// Basic Metadata for a Kvalobs database +type DB struct { + Name string + ConnEnvVar string + Tables map[string]*Table +} + +// Returns two `DB` structs with metadata for the prod and hist databases +func InitDBs() map[string]DB { + tables := map[string]*Table{ + "data": {Name: "data", DumpLabels: dumpDataLabels, DumpSeries: dumpDataSeries, Import: importData}, + "text_data": {Name: "text_data", DumpLabels: dumpTextLabels, DumpSeries: dumpTextSeries, Import: importText}, + } + + return map[string]DB{ + "kvalobs": {Name: "kvalobs", ConnEnvVar: "KVALOBS_CONN_STRING", Tables: tables}, + "histkvalobs": {Name: "histkvalobs", ConnEnvVar: "HISTKVALOBS_CONN_STRING", Tables: tables}, + } +} diff --git a/migrations/kvalobs/db/series_dump_functions.go b/migrations/kvalobs/db/series_dump_functions.go new file mode 100644 index 00000000..538db49d --- /dev/null +++ b/migrations/kvalobs/db/series_dump_functions.go @@ -0,0 +1,105 @@ +package db + +import ( + "context" + "fmt" + "log/slog" + "migrate/utils" + "os" + "path/filepath" + + "github.com/gocarina/gocsv" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" +) + +func dumpDataSeries(label *Label, timespan *utils.TimeSpan, path string, pool *pgxpool.Pool) error { + // NOTE: sensor and level could be NULL, but in reality they have default values + query := `SELECT obstime, original, tbtime, corrected, controlinfo, useinfo, cfailed + FROM data + WHERE stationid = $1 + AND typeid = $2 + AND paramid = $3 + AND sensor = $4 + AND level = $5 + AND ($6::timestamp IS NULL OR obstime >= $6) + AND ($7::timestamp IS NULL OR obstime < $7) + ORDER BY obstime` + + // Convert to string because `sensor` in Kvalobs is a BPCHAR(1) + var sensor *string + if label.Sensor != nil { + sensorval := fmt.Sprint(*label.Sensor) + sensor = &sensorval + } + + rows, err := pool.Query( + context.TODO(), + query, + label.StationID, + label.TypeID, + label.ParamID, + sensor, + label.Level, + timespan.From, + timespan.To, + ) + if err != nil { + return err + } + + data, err := pgx.CollectRows(rows, pgx.RowToAddrOfStructByName[DataObs]) + if err != nil { + return err + } + + return writeSeriesCSV(data, path, label) +} + +func dumpTextSeries(label *Label, timespan *utils.TimeSpan, path string, pool *pgxpool.Pool) error { + query := `SELECT obstime, original, tbtime FROM text_data + WHERE stationid = $1 + AND typeid = $2 + AND paramid = $3 + AND ($4::timestamp IS NULL OR obstime >= $4) + AND ($5::timestamp IS NULL OR obstime < $5) + ORDER BY obstime` + + rows, err := pool.Query( + context.TODO(), + query, + label.StationID, + label.TypeID, + label.ParamID, + timespan.From, + timespan.To, + ) + if err != nil { + return err + } + + data, err := pgx.CollectRows(rows, pgx.RowToAddrOfStructByName[TextObs]) + if err != nil { + return err + } + + return writeSeriesCSV(data, path, label) +} + +func writeSeriesCSV[S DataSeries | TextSeries](series S, path string, label *Label) error { + filename := filepath.Join(path, label.ToFilename()) + file, err := os.Create(filename) + if err != nil { + slog.Error(err.Error()) + return err + } + + // Write number of lines on first line, keep headers on 2nd line + file.Write([]byte(fmt.Sprintf("%v\n", len(series)))) + if err = gocsv.Marshal(series, file); err != nil { + slog.Error(err.Error()) + return err + } + + return nil +} diff --git a/migrations/kvalobs/db/table.go b/migrations/kvalobs/db/table.go new file mode 100644 index 00000000..942552bb --- /dev/null +++ b/migrations/kvalobs/db/table.go @@ -0,0 +1,25 @@ +package db + +import ( + "migrate/utils" + + "github.com/jackc/pgx/v5/pgxpool" +) + +// Maps to `data` and `text_data` tables in Kvalobs +type Table struct { + Name string + Path string // Path of the dumped table + DumpLabels LabelDumpFunc // Function that dumps labels from the table + DumpSeries ObsDumpFunc // Function that dumps observations from the table + Import ImportFunc // Function that parses dumps and ingests observations into LARD +} + +// Function used to query labels from kvalobs given an optional timespan +type LabelDumpFunc func(timespan *utils.TimeSpan, pool *pgxpool.Pool, maxConn int) ([]*Label, error) + +// Function used to query timeseries from kvalobs for a specific label and dump them inside path +type ObsDumpFunc func(label *Label, timespan *utils.TimeSpan, path string, pool *pgxpool.Pool) error + +// Lard Import function +type ImportFunc func(tsid int32, label *Label, filename, logStr string, timespan *utils.TimeSpan, pool *pgxpool.Pool) (int64, error) diff --git a/migrations/kvalobs/dump/dump.go b/migrations/kvalobs/dump/dump.go new file mode 100644 index 00000000..6a53a995 --- /dev/null +++ b/migrations/kvalobs/dump/dump.go @@ -0,0 +1,128 @@ +package dump + +import ( + "context" + "fmt" + "log/slog" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/jackc/pgx/v5/pgxpool" + + kvalobs "migrate/kvalobs/db" + "migrate/utils" +) + +func getLabels(table *kvalobs.Table, pool *pgxpool.Pool, timespan *utils.TimeSpan, config *Config) (labels []*kvalobs.Label, err error) { + labelFile := fmt.Sprintf("%s_labels_%s.csv", table.Path, timespan.ToString()) + + if _, err := os.Stat(labelFile); err != nil || config.UpdateLabels { + labels, err = table.DumpLabels(timespan, pool, config.MaxConn) + if err != nil { + return nil, err + } + return labels, kvalobs.WriteLabelCSV(labelFile, labels) + } + return kvalobs.ReadLabelCSV(labelFile) +} + +func getStationLabelMap(labels []*kvalobs.Label) map[int32][]*kvalobs.Label { + labelmap := make(map[int32][]*kvalobs.Label) + + for _, label := range labels { + labelmap[label.StationID] = append(labelmap[label.StationID], label) + } + + return labelmap +} + +func dumpTable(table *kvalobs.Table, pool *pgxpool.Pool, config *Config) { + if !config.LabelsOnly { + utils.SetLogFile(table.Path, "dump") + } + fmt.Printf("Dumping to %q...\n", table.Path) + defer fmt.Println(strings.Repeat("- ", 40)) + + timespan := config.TimeSpan() + labels, err := getLabels(table, pool, timespan, config) + if err != nil || config.LabelsOnly { + return + } + + stationMap := getStationLabelMap(labels) + + // Used to limit connections to the database + semaphore := make(chan struct{}, config.MaxConn) + var wg sync.WaitGroup + + for station, labels := range stationMap { + stationPath := filepath.Join(table.Path, fmt.Sprint(station)) + + if !utils.IsEmptyOrContains(config.Stations, station) { + continue + } + + if err := os.MkdirAll(stationPath, os.ModePerm); err != nil { + slog.Error(err.Error()) + return + } + + // TODO: this bar is a bit deceiving if you don't dump all the labels + // Maybe should only cache the ones requested from cli? + bar := utils.NewBar(len(labels), fmt.Sprintf("%10d", station)) + bar.RenderBlank() + + for _, label := range labels { + wg.Add(1) + semaphore <- struct{}{} + + go func() { + defer func() { + bar.Add(1) + wg.Done() + // Release semaphore + <-semaphore + }() + + if !config.ShouldProcessLabel(label) { + return + } + + logStr := label.LogStr() + if err := table.DumpSeries(label, timespan, stationPath, pool); err != nil { + slog.Info(logStr + err.Error()) + return + } + + slog.Info(logStr + "dumped successfully") + }() + } + wg.Wait() + } +} + +func dumpDB(database kvalobs.DB, config *Config) { + pool, err := pgxpool.New(context.Background(), os.Getenv(database.ConnEnvVar)) + if err != nil { + slog.Error(fmt.Sprint("Could not connect to Kvalobs:", err)) + return + } + defer pool.Close() + + path := filepath.Join(config.Path, database.Name) + if err := os.MkdirAll(path, os.ModePerm); err != nil { + slog.Error(err.Error()) + return + } + + for name, table := range database.Tables { + if !utils.IsEmptyOrEqual(config.Table, name) { + continue + } + + table.Path = filepath.Join(path, table.Name) + dumpTable(table, pool, config) + } +} diff --git a/migrations/kvalobs/dump/main.go b/migrations/kvalobs/dump/main.go new file mode 100644 index 00000000..bb7041ab --- /dev/null +++ b/migrations/kvalobs/dump/main.go @@ -0,0 +1,27 @@ +package dump + +import ( + "migrate/kvalobs/db" + "migrate/utils" +) + +// TODO: there were some comments in the original script about +// the fact that the same timeseries could be in both +// 'data' and 'text_data' + +type Config struct { + db.BaseConfig + LabelsOnly bool `arg:"--labels-only" help:"Only dump labels"` + UpdateLabels bool `arg:"--labels-update" help:"Overwrites the label CSV files"` + MaxConn int `arg:"-n" default:"4" help:"Max number of allowed concurrent connections to Kvalobs"` +} + +func (config *Config) Execute() { + dbs := db.InitDBs() + for name, db := range dbs { + if !utils.IsEmptyOrEqual(config.Database, name) { + continue + } + dumpDB(db, config) + } +} diff --git a/migrations/kvalobs/import/cache/main.go b/migrations/kvalobs/import/cache/main.go new file mode 100644 index 00000000..c5f4ebba --- /dev/null +++ b/migrations/kvalobs/import/cache/main.go @@ -0,0 +1,115 @@ +package cache + +import ( + "context" + "database/sql" + "log/slog" + "os" + "time" + + "github.com/jackc/pgx/v5" + + "migrate/kvalobs/db" + "migrate/stinfosys" + "migrate/utils" +) + +type KvalobsTimespanMap = map[MetaKey]utils.TimeSpan + +type Cache struct { + Meta KvalobsTimespanMap + Permits stinfosys.PermitMaps + // Params stinfosys.ScalarMap // Don't need them +} + +func New(kvalobs db.DB) *Cache { + conn, ctx := stinfosys.Connect() + defer conn.Close(ctx) + + permits := stinfosys.NewPermitTables(conn) + // timeseries := + + timespans := cacheKvalobsTimeseriesTimespans(kvalobs) + return &Cache{Permits: permits, Meta: timespans} +} + +func (c *Cache) GetSeriesTimespan(label *db.Label) (utils.TimeSpan, error) { + // First try to lookup timespan with both stationid and paramid + // TODO: should these timespans modify an existing timeseries in lard? + key := MetaKey{Stationid: label.StationID, Paramid: sql.NullInt32{Int32: label.ParamID, Valid: true}} + if timespan, ok := c.Meta[key]; ok { + return timespan, nil + } + + // Otherwise try with stationid only + key.Paramid = sql.NullInt32{} + if timespan, ok := c.Meta[key]; ok { + return timespan, nil + } + + // If there is no timespan we insert null fromtime and totime + // TODO: is this really what we want to do? + // Is there another place where to find this information? + return utils.TimeSpan{}, nil +} + +func (c *Cache) TimeseriesIsOpen(stnr, typeid, paramid int32) bool { + return c.Permits.TimeseriesIsOpen(stnr, typeid, paramid) +} + +// In `station_metadata` only the stationid is required to be non-NULL +// Paramid can be optionally specified +// Typeid, sensor, and level column are all NULL, so they are not present in this struct +type MetaKey struct { + Stationid int32 + Paramid sql.NullInt32 +} + +// Query kvalobs `station_metadata` table that stores timeseries timespans +func cacheKvalobsTimeseriesTimespans(kvalobs db.DB) KvalobsTimespanMap { + cache := make(KvalobsTimespanMap) + + slog.Info("Connecting to Kvalobs to cache metadata") + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + conn, err := pgx.Connect(ctx, os.Getenv(kvalobs.ConnEnvVar)) + if err != nil { + slog.Error("Could not connect to Kvalobs. Make sure to be connected to the VPN. " + err.Error()) + os.Exit(1) + } + defer conn.Close(ctx) + + query := `SELECT stationid, paramid, fromtime, totime FROM station_metadata` + + rows, err := conn.Query(context.TODO(), query) + if err != nil { + slog.Error(err.Error()) + os.Exit(1) + } + + for rows.Next() { + var key MetaKey + var timespan utils.TimeSpan + + err := rows.Scan( + &key.Stationid, + &key.Paramid, + ×pan.From, + ×pan.To, + ) + if err != nil { + slog.Error(err.Error()) + os.Exit(1) + } + + cache[key] = timespan + } + + if rows.Err() != nil { + slog.Error(rows.Err().Error()) + os.Exit(1) + } + + return cache +} diff --git a/migrations/kvalobs/import/import.go b/migrations/kvalobs/import/import.go new file mode 100644 index 00000000..785b2a75 --- /dev/null +++ b/migrations/kvalobs/import/import.go @@ -0,0 +1,123 @@ +package port + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + "github.com/jackc/pgx/v5/pgxpool" + + kvalobs "migrate/kvalobs/db" + "migrate/kvalobs/import/cache" + "migrate/lard" + "migrate/utils" +) + +func ImportTable(table *kvalobs.Table, cache *cache.Cache, pool *pgxpool.Pool, config *Config) (int64, error) { + fmt.Printf("Importing from %q...\n", table.Path) + defer fmt.Println(strings.Repeat("- ", 40)) + + stations, err := os.ReadDir(table.Path) + if err != nil { + slog.Error(err.Error()) + return 0, err + } + + importTimespan := config.TimeSpan() + fmt.Printf("Number of stations to import: %d...\n", len(stations)) + var rowsInserted int64 + for _, station := range stations { + stnr, err := strconv.ParseInt(station.Name(), 10, 32) + if err != nil || !utils.IsEmptyOrContains(config.Stations, int32(stnr)) { + continue + } + + stationDir := filepath.Join(table.Path, station.Name()) + labels, err := os.ReadDir(stationDir) + if err != nil { + slog.Warn(err.Error()) + continue + } + + bar := utils.NewBar(len(labels), fmt.Sprintf("%10s", station.Name())) + bar.RenderBlank() + + var wg sync.WaitGroup + for _, file := range labels { + wg.Add(1) + go func() { + defer func() { + bar.Add(1) + wg.Done() + }() + + label, err := kvalobs.LabelFromFilename(file.Name()) + if err != nil { + slog.Error(err.Error()) + return + } + + if !config.ShouldProcessLabel(label) { + return + } + + logStr := label.LogStr() + // Check if data for this station/element is restricted + if !cache.TimeseriesIsOpen(label.StationID, label.TypeID, label.ParamID) { + // TODO: eventually use this to choose which table to use on insert + slog.Warn(logStr + "timeseries data is restricted, skipping") + return + } + + tsTimespan, err := cache.GetSeriesTimespan(label) + if err != nil { + slog.Error(logStr + err.Error()) + return + } + + // TODO: figure out where to get fromtime, kvalobs directly? Stinfosys? + tsid, err := lard.GetTimeseriesID(label.ToLard(), tsTimespan, pool) + if err != nil { + slog.Error(logStr + err.Error()) + return + } + + filename := filepath.Join(stationDir, file.Name()) + // TODO: it's probably better to dump in different directories + // instead of introducing runtime checks + count, err := table.Import(tsid, label, filename, logStr, importTimespan, pool) + if err != nil { + // Logged inside table.Import + return + } + + rowsInserted += count + }() + } + wg.Wait() + } + + outputStr := fmt.Sprintf("%v: %v total rows inserted", table.Path, rowsInserted) + slog.Info(outputStr) + fmt.Println(outputStr) + + return rowsInserted, nil +} + +func ImportDB(database kvalobs.DB, cache *cache.Cache, pool *pgxpool.Pool, config *Config) { + path := filepath.Join(config.Path, database.Name) + + for name, table := range database.Tables { + if !utils.IsEmptyOrEqual(config.Table, name) { + continue + } + + table.Path = filepath.Join(path, table.Name) + utils.SetLogFile(table.Path, "import") + ImportTable(table, cache, pool, config) + } +} diff --git a/migrations/kvalobs/import/main.go b/migrations/kvalobs/import/main.go new file mode 100644 index 00000000..8b0ea3c3 --- /dev/null +++ b/migrations/kvalobs/import/main.go @@ -0,0 +1,58 @@ +package port + +import ( + "context" + "fmt" + "log/slog" + "os" + + "github.com/jackc/pgx/v5/pgxpool" + + kvalobs "migrate/kvalobs/db" + "migrate/kvalobs/import/cache" + "migrate/lard" + "migrate/utils" +) + +type Config struct { + kvalobs.BaseConfig + Reindex bool `help:"Drop PG indices before insertion. Might improve performance"` +} + +func (config *Config) Execute() error { + dbs := kvalobs.InitDBs() + // Only cache from histkvalobs? + cache := cache.New(dbs["histkvalobs"]) + + pool, err := pgxpool.New(context.Background(), os.Getenv(lard.LARD_ENV_VAR)) + if err != nil { + slog.Error(fmt.Sprint("Could not connect to Kvalobs:", err)) + } + defer pool.Close() + + if config.Reindex { + utils.DropIndices(pool) + } + + // Recreate indices even in case the main function panics + defer func() { + r := recover() + if config.Reindex { + utils.CreateIndices(pool) + } + + if r != nil { + panic(r) + } + }() + + for name, db := range dbs { + if !utils.IsEmptyOrEqual(config.Database, name) { + continue + } + ImportDB(db, cache, pool, config) + + } + + return nil +} diff --git a/migrations/kvalobs/main.go b/migrations/kvalobs/main.go new file mode 100644 index 00000000..f8cbb053 --- /dev/null +++ b/migrations/kvalobs/main.go @@ -0,0 +1,33 @@ +package kvalobs + +import ( + "fmt" + "os" + + "github.com/alexflint/go-arg" + + "migrate/kvalobs/check" + "migrate/kvalobs/dump" + port "migrate/kvalobs/import" +) + +type Cmd struct { + Dump *dump.Config `arg:"subcommand" help:"Dump tables from Kvalobs to CSV"` + Import *port.Config `arg:"subcommand" help:"Import CSV file dumped from Kvalobs"` + Check *check.Config `arg:"subcommand" help:"Performs various checks on kvalobs timeseries"` +} + +func (c *Cmd) Execute(parser *arg.Parser) { + switch { + case c.Dump != nil: + c.Dump.Execute() + case c.Import != nil: + c.Import.Execute() + case c.Check != nil: + c.Check.Execute() + default: + fmt.Println("Error: passing a subcommand is required.") + fmt.Println() + parser.WriteHelpForSubcommand(os.Stdout, "kvalobs") + } +} diff --git a/migrations/lard/import.go b/migrations/lard/import.go index 9729f95e..3617b5ea 100644 --- a/migrations/lard/import.go +++ b/migrations/lard/import.go @@ -55,8 +55,8 @@ func InsertFlags(ts [][]any, pool *pgxpool.Pool, logStr string) error { size := len(ts) count, err := pool.CopyFrom( context.TODO(), - pgx.Identifier{"flags", "old_databases"}, - []string{"timeseries", "obstime", "corrected", "controlinfo", "useinfo", "cfailed"}, + pgx.Identifier{"flags", "kvdata"}, + []string{"timeseries", "obstime", "original", "corrected", "controlinfo", "useinfo", "cfailed"}, pgx.CopyFromRows(ts), ) if err != nil { diff --git a/migrations/lard/main.go b/migrations/lard/main.go index be99c2da..b3307e07 100644 --- a/migrations/lard/main.go +++ b/migrations/lard/main.go @@ -2,6 +2,8 @@ package lard import "time" +const LARD_ENV_VAR string = "LARD_CONN_STRING" + // Struct mimicking the `public.data` table type DataObs struct { // Timeseries ID @@ -30,12 +32,14 @@ func (o *TextObs) ToRow() []any { return []any{o.Id, o.Obstime, o.Text} } -// Struct mimicking the `flags.old_databases` table +// Struct mimicking the `flags.kvdata` table type Flag struct { // Timeseries ID Id int32 // Time of observation Obstime time.Time + // Original value before QC tests + Original *float32 // Corrected value after QC tests Corrected *float32 // Flag encoding quality control status @@ -43,10 +47,10 @@ type Flag struct { // Flag encoding quality control status Useinfo *string // Number of tests that failed? - Cfailed *int32 + Cfailed *string } func (o *Flag) ToRow() []any { // "timeseries", "obstime", "corrected","controlinfo", "useinfo", "cfailed" - return []any{o.Id, o.Obstime, o.Corrected, o.Controlinfo, o.Useinfo, o.Cfailed} + return []any{o.Id, o.Obstime, o.Original, o.Corrected, o.Controlinfo, o.Useinfo, o.Cfailed} } diff --git a/migrations/lard/timeseries.go b/migrations/lard/timeseries.go index 5629b3c4..185cc51d 100644 --- a/migrations/lard/timeseries.go +++ b/migrations/lard/timeseries.go @@ -2,7 +2,7 @@ package lard import ( "context" - "time" + "migrate/utils" "github.com/jackc/pgx/v5/pgxpool" ) @@ -10,13 +10,20 @@ import ( // Struct that mimics `labels.met` table structure type Label struct { StationID int32 - TypeID int32 ParamID int32 + TypeID int32 Sensor *int32 Level *int32 } -func GetTimeseriesID(label Label, fromtime time.Time, pool *pgxpool.Pool) (tsid int32, err error) { +func (l *Label) sensorLevelAreBothZero() bool { + if l.Sensor == nil || l.Level == nil { + return false + } + return *l.Level == 0 && *l.Sensor == 0 +} + +func GetTimeseriesID(label *Label, timespan utils.TimeSpan, pool *pgxpool.Pool) (tsid int32, err error) { // Query LARD labels table err = pool.QueryRow( context.TODO(), @@ -33,16 +40,38 @@ func GetTimeseriesID(label Label, fromtime time.Time, pool *pgxpool.Pool) (tsid return tsid, nil } - // Otherwise insert new timeseries + // In KDVH and Kvalobs sensor and level have default values, while in LARD they are NULL + // if Obsinn does not specify them. Therefore we need to check if sensor and level are NULL + // when they are both zero. + // FIXME(?): in some cases, level and sensor are marked with (0,0) in Obsinn, + // so there might be problems if a timeseries is not present in LARD at the time of importing + if label.sensorLevelAreBothZero() { + err := pool.QueryRow( + context.TODO(), + `SELECT timeseries FROM labels.met + WHERE station_id = $1 + AND param_id = $2 + AND type_id = $3 + AND lvl IS NULL + AND sensor IS NULL`, + label.StationID, label.ParamID, label.TypeID).Scan(&tsid) + + if err == nil { + return tsid, nil + } + } + + // If none of the above worked insert a new timeseries transaction, err := pool.Begin(context.TODO()) if err != nil { return tsid, err } + // TODO: should we set `deactivated` to true if `totime` is not NULL? err = transaction.QueryRow( context.TODO(), - `INSERT INTO public.timeseries (fromtime) VALUES ($1) RETURNING id`, - fromtime, + `INSERT INTO public.timeseries (fromtime, totime) VALUES ($1, $2) RETURNING id`, + timespan.From, timespan.To, ).Scan(&tsid) if err != nil { return tsid, err diff --git a/migrations/main.go b/migrations/main.go index 78ae62c8..84d9eaa5 100644 --- a/migrations/main.go +++ b/migrations/main.go @@ -3,41 +3,48 @@ package main import ( "fmt" "log" + "os" - "github.com/jessevdk/go-flags" + "github.com/alexflint/go-arg" "github.com/joho/godotenv" "migrate/kdvh" + "migrate/kvalobs" ) type CmdArgs struct { - KDVH kdvh.Cmd `command:"kdvh" description:"Perform KDVH migrations"` + KDVH *kdvh.Cmd `arg:"subcommand" help:"Perform KDVH migrations"` + Kvalobs *kvalobs.Cmd `arg:"subcommand" help:"Perform Kvalobs migrations"` } func main() { log.SetFlags(log.LstdFlags | log.Lshortfile) - // The following env variables are needed: + // The following env variables are required: // 1. Dump - // - kdvh: "KDVH_PROXY_CONN" + // - kdvh: "KDVH_PROXY_CONN_STRING" + // - kvalobs: "KVALOBS_CONN_STRING", "HISTKVALOBS_CONN_STRING" // // 2. Import - // - kdvh: "LARD_STRING", "STINFO_STRING", "KDVH_PROXY_CONN" + // - kdvh: "LARD_CONN_STRING", "STINFO_CONN_STRING", "KDVH_PROXY_CONN_STRING" + // - kvalobs: "LARD_CONN_STRING", "STINFO_CONN_STRING", "KVALOBS_CONN_STRING" err := godotenv.Load() if err != nil { fmt.Println(err) return } - // NOTE: go-flags calls the Execute method on the parsed subcommand - _, err = flags.Parse(&CmdArgs{}) - if err != nil { - if flagsErr, ok := err.(*flags.Error); ok { - if flagsErr.Type == flags.ErrHelp { - return - } - } - fmt.Println("Type './migrate -h' for help") - return + args := CmdArgs{} + parser := arg.MustParse(&args) + + switch { + case args.KDVH != nil: + args.KDVH.Execute(parser) + case args.Kvalobs != nil: + args.Kvalobs.Execute(parser) + default: + fmt.Println("Error: passing a subcommand is required.") + fmt.Println() + parser.WriteHelp(os.Stdout) } } diff --git a/migrations/stinfosys/elem_map.go b/migrations/stinfosys/elem_map.go new file mode 100644 index 00000000..0dd746fc --- /dev/null +++ b/migrations/stinfosys/elem_map.go @@ -0,0 +1,73 @@ +package stinfosys + +import ( + "context" + "log/slog" + "os" + "time" + + "github.com/jackc/pgx/v5" +) + +// Map of metadata used to query timeseries ID in LARD +type ElemMap = map[Key]Param + +// Key is used for lookup of parameter offsets and metadata from Stinfosys +type Key struct { + ElemCode string + TableName string +} + +// Subset of elem_map_cfnames_param query with only param info +type Param struct { + TypeID int32 + ParamID int32 + Hlevel *int32 + Sensor int32 + Fromtime time.Time + IsScalar bool +} + +// Save metadata for later use by quering Stinfosys +func CacheElemMap(conn *pgx.Conn) ElemMap { + cache := make(ElemMap) + + rows, err := conn.Query( + context.TODO(), + `SELECT elem_code, table_name, typeid, paramid, hlevel, sensor, fromtime, scalar + FROM elem_map_cfnames_param + JOIN param USING(paramid)`, + ) + if err != nil { + slog.Error(err.Error()) + os.Exit(1) + } + + for rows.Next() { + var key Key + var param Param + err := rows.Scan( + &key.ElemCode, + &key.TableName, + ¶m.TypeID, + ¶m.ParamID, + ¶m.Hlevel, + ¶m.Sensor, + ¶m.Fromtime, + ¶m.IsScalar, + ) + if err != nil { + slog.Error(err.Error()) + os.Exit(1) + } + + cache[key] = param + } + + if rows.Err() != nil { + slog.Error(rows.Err().Error()) + os.Exit(1) + } + + return cache +} diff --git a/migrations/stinfosys/main.go b/migrations/stinfosys/main.go new file mode 100644 index 00000000..7cf002a7 --- /dev/null +++ b/migrations/stinfosys/main.go @@ -0,0 +1,23 @@ +package stinfosys + +import ( + "context" + "log" + "os" + "time" + + "github.com/jackc/pgx/v5" +) + +const STINFOSYS_ENV_VAR string = "STINFO_CONN_STRING" + +func Connect() (*pgx.Conn, context.Context) { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + conn, err := pgx.Connect(ctx, os.Getenv(STINFOSYS_ENV_VAR)) + if err != nil { + log.Fatal("Could not connect to Stinfosys. Make sure to be connected to the VPN. " + err.Error()) + } + return conn, ctx +} diff --git a/migrations/stinfosys/non_scalars.go b/migrations/stinfosys/non_scalars.go new file mode 100644 index 00000000..3439311e --- /dev/null +++ b/migrations/stinfosys/non_scalars.go @@ -0,0 +1,20 @@ +package stinfosys + +import ( + "context" + "log" + + "github.com/jackc/pgx/v5" +) + +func GetNonScalars(conn *pgx.Conn) []int32 { + rows, err := conn.Query(context.TODO(), "SELECT paramid FROM param WHERE scalar = false ORDER BY paramid") + if err != nil { + log.Fatal(err) + } + nonscalars, err := pgx.CollectRows(rows, pgx.RowTo[int32]) + if err != nil { + log.Fatal(err) + } + return nonscalars +} diff --git a/migrations/kdvh/import/cache/permissions.go b/migrations/stinfosys/permissions.go similarity index 77% rename from migrations/kdvh/import/cache/permissions.go rename to migrations/stinfosys/permissions.go index a820226c..ad2ed874 100644 --- a/migrations/kdvh/import/cache/permissions.go +++ b/migrations/stinfosys/permissions.go @@ -1,4 +1,4 @@ -package cache +package stinfosys import ( "context" @@ -8,6 +8,8 @@ import ( "github.com/jackc/pgx/v5" ) +const STINFO_ENV_VAR string = "STINFO_CONN_STRING" + type StationId = int32 type PermitId = int32 @@ -20,6 +22,18 @@ type ParamPermit struct { PermitId int32 } +type PermitMaps struct { + ParamPermits ParamPermitMap + StationPermits StationPermitMap +} + +func NewPermitTables(conn *pgx.Conn) PermitMaps { + return PermitMaps{ + ParamPermits: cacheParamPermits(conn), + StationPermits: cacheStationPermits(conn), + } +} + func cacheParamPermits(conn *pgx.Conn) ParamPermitMap { cache := make(ParamPermitMap) @@ -84,9 +98,9 @@ func cacheStationPermits(conn *pgx.Conn) StationPermitMap { return cache } -func (c *Cache) timeseriesIsOpen(stnr, typeid, paramid int32) bool { +func (permits *PermitMaps) TimeseriesIsOpen(stnr, typeid, paramid int32) bool { // First check param permit table - if permits, ok := c.ParamPermits[stnr]; ok { + if permits, ok := permits.ParamPermits[stnr]; ok { for _, permit := range permits { if (permit.TypeId == 0 || permit.TypeId == typeid) && (permit.ParamdId == 0 || permit.ParamdId == paramid) { @@ -96,7 +110,7 @@ func (c *Cache) timeseriesIsOpen(stnr, typeid, paramid int32) bool { } // Otherwise check station permit table - if permit, ok := c.StationPermits[stnr]; ok { + if permit, ok := permits.StationPermits[stnr]; ok { return permit == 1 } diff --git a/migrations/stinfosys/timeseries.go b/migrations/stinfosys/timeseries.go new file mode 100644 index 00000000..9de4936b --- /dev/null +++ b/migrations/stinfosys/timeseries.go @@ -0,0 +1,45 @@ +package stinfosys + +import ( + "context" + "log" + kvalobs "migrate/kvalobs/db" + "migrate/utils" + + "github.com/jackc/pgx/v5" +) + +type TimespanMap = map[kvalobs.Label]utils.TimeSpan + +func getTimeseries(conn *pgx.Conn) TimespanMap { + cache := make(TimespanMap) + + rows, err := conn.Query(context.TODO(), + `SELECT stationid, message_formatid, paramid, sensor, level, fromtime, totime + FROM time_series`) + if err != nil { + log.Fatal(err) + } + + for rows.Next() { + var label kvalobs.Label + var timespan utils.TimeSpan + + err := rows.Scan( + &label.StationID, + &label.TypeID, + &label.ParamID, + &label.Sensor, + &label.Level, + ×pan.From, + ×pan.To, + ) + if err != nil { + log.Fatal(err) + } + + cache[label] = timespan + } + + return cache +} diff --git a/migrations/kdvh/tests/T_MDATA_combined/12345/TA.csv b/migrations/tests/files/T_MDATA_combined/12345/TA.csv similarity index 100% rename from migrations/kdvh/tests/T_MDATA_combined/12345/TA.csv rename to migrations/tests/files/T_MDATA_combined/12345/TA.csv diff --git a/migrations/tests/files/histkvalobs/data/18700/18700_313_509_0_0.csv b/migrations/tests/files/histkvalobs/data/18700/18700_313_509_0_0.csv new file mode 100644 index 00000000..e74eebd9 --- /dev/null +++ b/migrations/tests/files/histkvalobs/data/18700/18700_313_509_0_0.csv @@ -0,0 +1,41 @@ +39 +Obstime,Original,Tbtime,Corrected,Controlinfo,Useinfo,Cfailed +2024-01-04T07:00:00Z,1171,2024-01-04T07:12:11.191105Z,1171,0100000000000000,7000000000000000, +2024-01-08T16:30:00Z,9528,2024-01-08T16:42:13.921859Z,9528,0100000000000000,7000000000000000, +2024-01-08T16:50:00Z,9376,2024-01-08T17:02:19.556872Z,9376,0100000000000000,7000000000000000, +2024-01-08T17:00:00Z,9301,2024-01-08T17:12:16.843635Z,9301,0100000000000000,7000000000000000, +2024-01-08T20:40:00Z,10889,2024-01-08T20:52:15.05597Z,10889,0100000000000000,7000000000000000, +2024-01-08T21:50:00Z,7367,2024-01-08T22:02:26.760424Z,7367,0100000000000000,7000000000000000, +2024-01-08T23:20:00Z,7908,2024-01-08T23:32:15.434304Z,7908,0100000000000000,7000000000000000, +2024-01-10T22:30:00Z,6543,2024-01-10T22:42:15.063955Z,6543,0100000000000000,7000000000000000, +2024-01-11T00:40:00Z,6010,2024-01-11T00:52:22.932791Z,6010,0100000000000000,7000000000000000, +2024-01-11T05:10:00Z,5268,2024-01-11T05:22:19.912004Z,5268,0100000000000000,7000000000000000, +2024-01-11T05:20:00Z,4919,2024-01-11T05:32:26.979669Z,4919,0100000000000000,7000000000000000, +2024-01-11T06:20:00Z,4835,2024-01-11T06:32:10.06531Z,4835,0100000000000000,7000000000000000, +2024-01-11T06:30:00Z,4835,2024-01-11T06:42:07.940127Z,4835,0100000000000000,7000000000000000, +2024-01-11T07:40:00Z,5149,2024-01-11T07:52:14.686664Z,5149,0100000000000000,7000000000000000, +2024-01-11T07:50:00Z,5801,2024-01-11T08:02:15.732653Z,5801,0100000000000000,7000000000000000, +2024-01-11T09:20:00Z,10185,2024-01-11T09:32:13.684992Z,10185,0100000000000000,7000000000000000, +2024-01-12T05:20:00Z,10150,2024-01-12T05:32:16.820627Z,10150,0100000000000000,7000000000000000, +2024-01-13T16:30:00Z,4715,2024-01-13T16:42:24.567189Z,4715,0100000000000000,7000000000000000, +2024-01-13T17:40:00Z,4265,2024-01-13T17:52:11.305075Z,4265,0100000000000000,7000000000000000, +2024-01-13T21:20:00Z,5629,2024-01-13T21:32:03.973429Z,5629,0100000000000000,7000000000000000, +2024-01-14T05:10:00Z,10235,2024-01-14T05:22:03.476192Z,10235,0100000000000000,7000000000000000, +2024-01-14T10:30:00Z,5854,2024-01-14T10:42:15.27465Z,5854,0100000000000000,7000000000000000, +2024-01-17T15:30:00Z,2977,2024-01-17T15:42:15.825088Z,2977,0100000000000000,7000000000000000, +2024-01-17T17:30:00Z,6034,2024-01-17T17:42:18.612787Z,6034,0100000000000000,7000000000000000, +2024-01-17T18:30:00Z,4655,2024-01-17T18:42:08.11493Z,4655,0100000000000000,7000000000000000, +2024-01-17T19:50:00Z,5360,2024-01-17T20:02:16.423065Z,5360,0100000000000000,7000000000000000, +2024-01-23T07:40:00Z,7038,2024-01-23T07:52:11.7033Z,7038,0100000000000000,7000000000000000, +2024-01-23T07:50:00Z,7023,2024-01-23T08:02:31.336199Z,7023,0100000000000000,7000000000000000, +2024-01-24T21:10:00Z,2932,2024-01-24T21:22:23.562398Z,2932,0100000000000000,7000000000000000, +2024-01-24T23:40:00Z,7247,2024-01-24T23:52:20.630463Z,7247,0100000000000000,7000000000000000, +2024-01-25T21:50:00Z,7787,2024-01-25T22:02:25.463753Z,7787,0100000000000000,7000000000000000, +2024-01-26T01:10:00Z,4310,2024-01-26T01:22:21.581437Z,4310,0100000000000000,7000000000000000, +2024-01-28T03:40:00Z,7203,2024-01-28T03:52:21.966694Z,7203,0100000000000000,7000000000000000, +2024-01-28T05:00:00Z,7427,2024-01-28T05:12:12.188434Z,7427,0100000000000000,7000000000000000, +2024-01-28T06:10:00Z,7188,2024-01-28T06:22:13.529801Z,7188,0100000000000000,7000000000000000, +2024-01-29T23:00:00Z,6798,2024-01-29T23:12:22.423795Z,6798,0100000000000000,7000000000000000, +2024-01-30T17:20:00Z,2408,2024-01-30T17:32:10.520612Z,2408,0100000000000000,7000000000000000, +2024-01-30T17:40:00Z,6124,2024-01-30T17:52:16.466449Z,6124,0100000000000000,7000000000000000, +2024-01-31T12:00:00Z,5030,2024-01-31T12:12:20.905963Z,5030,0100000000000000,7000000000000000, diff --git a/migrations/tests/files/kvalobs/text_data/18700/18700_1000_316__.csv b/migrations/tests/files/kvalobs/text_data/18700/18700_1000_316__.csv new file mode 100644 index 00000000..5b28d347 --- /dev/null +++ b/migrations/tests/files/kvalobs/text_data/18700/18700_1000_316__.csv @@ -0,0 +1,184 @@ +182 +Obstime,Original,Tbtime +2024-01-01T06:00:00Z,va,2024-01-01T06:08:24.240635Z +2024-01-01T09:00:00Z,va,2024-01-01T09:10:50.1473Z +2024-01-01T12:00:00Z,va,2024-01-01T12:08:01.342058Z +2024-01-01T15:00:00Z,va,2024-01-01T15:05:18.95104Z +2024-01-01T18:00:00Z,va,2024-01-01T18:30:11.257833Z +2024-01-01T21:00:00Z,va,2024-01-01T20:55:31.867204Z +2024-01-02T06:00:00Z,SC,2024-01-02T06:04:05.169123Z +2024-01-02T09:00:00Z,SC,2024-01-02T08:47:34.474338Z +2024-01-02T12:00:00Z,SC,2024-01-02T12:13:07.648614Z +2024-01-02T15:00:00Z,va,2024-01-02T15:02:44.39202Z +2024-01-02T18:00:00Z,va,2024-01-02T18:02:59.425499Z +2024-01-02T21:00:00Z,va,2024-01-02T20:58:54.74345Z +2024-01-03T06:00:00Z,va,2024-01-03T06:20:35.275366Z +2024-01-03T09:00:00Z,va,2024-01-03T08:52:25.111242Z +2024-01-03T12:00:00Z,va,2024-01-03T11:51:31.620272Z +2024-01-03T15:00:00Z,va,2024-01-03T14:57:27.552375Z +2024-01-03T18:00:00Z,va,2024-01-03T17:32:50.639057Z +2024-01-03T21:00:00Z,va,2024-01-03T20:22:47.873367Z +2024-01-04T06:00:00Z,va,2024-01-04T06:02:14.54783Z +2024-01-04T09:00:00Z,va,2024-01-04T09:38:35.151297Z +2024-01-04T12:00:00Z,va,2024-01-04T11:58:39.210352Z +2024-01-04T15:00:00Z,va,2024-01-04T14:36:28.976216Z +2024-01-04T18:00:00Z,va,2024-01-04T17:41:29.45406Z +2024-01-04T21:00:00Z,va,2024-01-04T20:35:33.725461Z +2024-01-05T06:00:00Z,va,2024-01-05T06:14:54.41311Z +2024-01-05T09:00:00Z,va,2024-01-05T08:44:30.531347Z +2024-01-05T12:00:00Z,va,2024-01-05T11:51:51.93322Z +2024-01-05T15:00:00Z,va,2024-01-05T15:22:36.008918Z +2024-01-05T18:00:00Z,va,2024-01-05T17:39:49.191184Z +2024-01-05T21:00:00Z,va,2024-01-05T20:45:33.672168Z +2024-01-06T06:00:00Z,SC,2024-01-06T06:02:26.889775Z +2024-01-06T09:00:00Z,SC,2024-01-06T08:52:37.760554Z +2024-01-06T12:00:00Z,SC,2024-01-06T12:04:39.104056Z +2024-01-06T15:00:00Z,va,2024-01-06T14:51:41.02775Z +2024-01-06T18:00:00Z,va,2024-01-06T17:55:10.729851Z +2024-01-06T21:00:00Z,va,2024-01-06T20:45:06.338881Z +2024-01-07T06:00:00Z,SC,2024-01-07T06:02:23.831664Z +2024-01-07T09:00:00Z,SC,2024-01-07T09:00:16.907367Z +2024-01-07T12:00:00Z,SC,2024-01-07T12:00:36.88099Z +2024-01-07T15:00:00Z,va,2024-01-07T14:59:34.897702Z +2024-01-07T18:00:00Z,va,2024-01-07T18:11:41.549957Z +2024-01-07T21:00:00Z,va,2024-01-07T20:41:58.506384Z +2024-01-08T06:00:00Z,va,2024-01-08T06:01:01.372157Z +2024-01-08T09:00:00Z,va,2024-01-08T09:12:12.454335Z +2024-01-08T12:00:00Z,va,2024-01-08T12:15:47.63165Z +2024-01-08T15:00:00Z,va,2024-01-08T15:18:49.539079Z +2024-01-08T18:00:00Z,va,2024-01-08T19:42:54.296054Z +2024-01-08T21:00:00Z,va,2024-01-08T21:01:46.002814Z +2024-01-09T06:00:00Z,va,2024-01-09T05:58:14.305347Z +2024-01-09T09:00:00Z,va,2024-01-09T11:57:37.327976Z +2024-01-09T12:00:00Z,va,2024-01-09T11:59:47.53963Z +2024-01-09T15:00:00Z,SC,2024-01-09T14:50:23.998702Z +2024-01-09T18:00:00Z,SC,2024-01-09T17:51:45.133164Z +2024-01-09T21:00:00Z,SC,2024-01-09T20:53:42.77469Z +2024-01-10T06:00:00Z,va,2024-01-10T06:05:26.787634Z +2024-01-10T09:00:00Z,va,2024-01-10T09:08:11.442462Z +2024-01-10T12:00:00Z,va,2024-01-10T11:44:16.870721Z +2024-01-10T15:00:00Z,va,2024-01-11T08:41:11.583158Z +2024-01-10T18:00:00Z,va,2024-01-10T17:35:18.484989Z +2024-01-10T21:00:00Z,va,2024-01-10T20:41:50.248167Z +2024-01-11T06:00:00Z,SC,2024-01-11T06:04:20.161775Z +2024-01-11T09:00:00Z,SC,2024-01-11T08:59:27.063146Z +2024-01-11T12:00:00Z,SC,2024-01-11T11:49:34.889925Z +2024-01-11T15:00:00Z,JB,2024-01-11T14:36:50.081976Z +2024-01-11T18:00:00Z,JB,2024-01-11T17:43:17.266704Z +2024-01-11T21:00:00Z,JB,2024-01-11T20:40:41.69956Z +2024-01-12T06:00:00Z,va,2024-01-12T06:11:42.147972Z +2024-01-12T09:00:00Z,va,2024-01-12T09:26:38.134323Z +2024-01-12T12:00:00Z,va,2024-01-12T11:54:34.64495Z +2024-01-12T15:00:00Z,va,2024-01-12T14:31:53.355821Z +2024-01-12T18:00:00Z,va,2024-01-12T17:38:16.986212Z +2024-01-12T21:00:00Z,va,2024-01-12T20:39:25.87586Z +2024-01-13T06:00:00Z,va,2024-01-13T05:58:53.380086Z +2024-01-13T09:00:00Z,va,2024-01-13T08:58:14.543081Z +2024-01-13T12:00:00Z,va,2024-01-13T12:05:19.654932Z +2024-01-13T15:00:00Z,va,2024-01-13T14:51:45.819942Z +2024-01-13T18:00:00Z,va,2024-01-13T17:57:06.844951Z +2024-01-13T21:00:00Z,va,2024-01-13T20:48:20.136926Z +2024-01-14T06:00:00Z,va,2024-01-14T05:56:41.684562Z +2024-01-14T09:00:00Z,va,2024-01-14T08:54:13.553349Z +2024-01-14T12:00:00Z,va,2024-01-14T12:06:54.114354Z +2024-01-14T15:00:00Z,va,2024-01-14T14:51:24.397173Z +2024-01-14T18:00:00Z,va,2024-01-14T17:44:34.449537Z +2024-01-14T21:00:00Z,va,2024-01-14T21:07:00.418857Z +2024-01-15T06:00:00Z,va,2024-01-15T06:21:56.238594Z +2024-01-15T09:00:00Z,va,2024-01-15T09:21:49.666277Z +2024-01-15T12:00:00Z,va,2024-01-15T11:49:46.404973Z +2024-01-15T15:00:00Z,va,2024-01-15T15:03:35.548944Z +2024-01-15T18:00:00Z,va,2024-01-15T18:13:12.872413Z +2024-01-15T21:00:00Z,va,2024-01-15T21:04:01.093129Z +2024-01-16T06:00:00Z,SC,2024-01-16T06:02:29.196795Z +2024-01-16T09:00:00Z,SC,2024-01-16T08:59:58.937917Z +2024-01-16T12:00:00Z,SC,2024-01-16T11:53:13.700814Z +2024-01-16T15:00:00Z,va,2024-01-16T14:58:22.239067Z +2024-01-16T18:00:00Z,va,2024-01-16T18:13:01.354125Z +2024-01-16T21:00:00Z,va,2024-01-16T20:53:10.687428Z +2024-01-17T06:00:00Z,SC,2024-01-17T05:50:10.510894Z +2024-01-17T09:00:00Z,SC,2024-01-17T08:52:35.553462Z +2024-01-17T12:00:00Z,SC,2024-01-17T11:44:37.594396Z +2024-01-17T15:00:00Z,va,2024-01-17T14:54:25.938316Z +2024-01-17T18:00:00Z,va,2024-01-17T17:51:49.384976Z +2024-01-17T21:00:00Z,va,2024-01-17T20:37:27.05038Z +2024-01-18T06:00:00Z,va,2024-01-18T06:00:11.917462Z +2024-01-18T09:00:00Z,va,2024-01-18T09:10:18.629467Z +2024-01-18T12:00:00Z,va,2024-01-18T12:07:40.456704Z +2024-01-18T15:00:00Z,va,2024-01-18T14:46:35.571809Z +2024-01-18T18:00:00Z,va,2024-01-18T17:52:16.032836Z +2024-01-19T06:00:00Z,SC,2024-01-19T06:14:46.595777Z +2024-01-19T09:00:00Z,SC,2024-01-19T09:06:55.85563Z +2024-01-19T12:00:00Z,SC,2024-01-19T11:56:59.340324Z +2024-01-19T15:00:00Z,bw,2024-01-19T14:51:47.71753Z +2024-01-19T18:00:00Z,va,2024-01-19T17:39:43.99902Z +2024-01-19T21:00:00Z,va,2024-01-19T20:44:17.031778Z +2024-01-20T06:00:00Z,BW,2024-01-20T05:26:01.293989Z +2024-01-20T09:00:00Z,bw,2024-01-20T08:47:24.672509Z +2024-01-20T12:00:00Z,BW,2024-01-20T11:47:46.011987Z +2024-01-20T15:00:00Z,va,2024-01-20T14:25:38.98911Z +2024-01-20T18:00:00Z,va,2024-01-20T18:58:19.481547Z +2024-01-20T21:00:00Z,va,2024-01-20T20:59:04.386123Z +2024-01-21T06:00:00Z,va,2024-01-21T09:17:45.251403Z +2024-01-21T09:00:00Z,va,2024-01-21T09:20:25.583798Z +2024-01-21T12:00:00Z,va,2024-01-21T12:04:34.520547Z +2024-01-21T15:00:00Z,va,2024-01-21T14:57:45.720579Z +2024-01-21T18:00:00Z,va,2024-01-21T17:53:16.434501Z +2024-01-21T21:00:00Z,va,2024-01-21T20:55:08.020237Z +2024-01-22T06:00:00Z,va,2024-01-22T06:49:17.410717Z +2024-01-22T09:00:00Z,va,2024-01-22T08:43:53.580873Z +2024-01-22T12:00:00Z,va,2024-01-22T12:02:25.689051Z +2024-01-22T15:00:00Z,SC,2024-01-22T14:57:12.17922Z +2024-01-22T18:00:00Z,SC,2024-01-22T18:02:32.378874Z +2024-01-22T21:00:00Z,SC,2024-01-22T20:47:33.700204Z +2024-01-23T06:00:00Z,va,2024-01-23T06:06:40.003847Z +2024-01-23T09:00:00Z,va,2024-01-23T09:44:54.914519Z +2024-01-23T12:00:00Z,va,2024-01-23T11:26:22.815102Z +2024-01-23T15:00:00Z,jih,2024-01-23T14:49:07.301263Z +2024-01-23T18:00:00Z,jih,2024-01-23T20:52:21.247563Z +2024-01-23T21:00:00Z,jih,2024-01-23T20:51:45.070332Z +2024-01-24T06:00:00Z,va,2024-01-24T06:21:48.032913Z +2024-01-24T09:00:00Z,va,2024-01-24T08:56:18.676311Z +2024-01-24T12:00:00Z,va,2024-01-24T11:59:30.47087Z +2024-01-24T15:00:00Z,mg,2024-01-24T15:32:33.55375Z +2024-01-24T18:00:00Z,mg,2024-01-24T17:56:06.438737Z +2024-01-24T21:00:00Z,mg,2024-01-24T20:42:05.766951Z +2024-01-25T06:00:00Z,SC,2024-01-25T06:01:42.014859Z +2024-01-25T09:00:00Z,SC,2024-01-25T08:52:18.318853Z +2024-01-25T12:00:00Z,SC,2024-01-25T12:02:59.837605Z +2024-01-25T15:00:00Z,VA,2024-01-25T14:49:11.476807Z +2024-01-26T06:00:00Z,va,2024-01-26T05:55:15.039851Z +2024-01-26T09:00:00Z,va,2024-01-26T09:04:00.737621Z +2024-01-26T12:00:00Z,va,2024-01-26T11:49:17.932132Z +2024-01-26T15:00:00Z,va,2024-01-26T14:53:41.582922Z +2024-01-26T18:00:00Z,va,2024-01-26T17:39:36.184963Z +2024-01-26T21:00:00Z,va,2024-01-26T20:40:10.970383Z +2024-01-27T06:00:00Z,SC,2024-01-27T06:02:23.021561Z +2024-01-27T09:00:00Z,SC,2024-01-27T08:58:11.570132Z +2024-01-27T12:00:00Z,SC,2024-01-27T11:42:35.473277Z +2024-01-27T15:00:00Z,mg,2024-01-27T15:00:17.328911Z +2024-01-27T18:00:00Z,mg,2024-01-27T17:58:20.247898Z +2024-01-27T21:00:00Z,mg,2024-01-27T20:43:19.447566Z +2024-01-28T06:00:00Z,SC,2024-01-28T06:02:23.33471Z +2024-01-28T09:00:00Z,SC,2024-01-28T08:54:38.591659Z +2024-01-28T12:00:00Z,SC,2024-01-28T11:58:44.609364Z +2024-01-28T15:00:00Z,mg,2024-01-28T15:34:37.199962Z +2024-01-28T18:00:00Z,mg,2024-01-28T18:01:27.087291Z +2024-01-28T21:00:00Z,mg,2024-01-28T20:49:38.070406Z +2024-01-29T06:00:00Z,va,2024-01-29T06:17:51.260721Z +2024-01-29T09:00:00Z,va,2024-01-29T09:17:11.929249Z +2024-01-29T12:00:00Z,va,2024-01-29T13:00:13.966988Z +2024-01-29T15:00:00Z,SC,2024-01-29T14:55:35.018886Z +2024-01-29T18:00:00Z,SC,2024-01-29T18:01:34.059239Z +2024-01-29T21:00:00Z,SC,2024-01-29T20:52:07.216667Z +2024-01-30T06:00:00Z,va,2024-01-30T05:54:41.193538Z +2024-01-30T09:00:00Z,va,2024-01-30T09:02:14.035271Z +2024-01-30T12:00:00Z,va,2024-01-30T11:48:50.918808Z +2024-01-30T15:00:00Z,va,2024-01-30T14:59:36.664281Z +2024-01-30T18:00:00Z,va,2024-01-30T18:16:24.86856Z +2024-01-30T21:00:00Z,va,2024-01-30T20:51:28.917896Z +2024-01-31T06:00:00Z,SC,2024-01-31T06:03:00.208048Z +2024-01-31T09:00:00Z,SC,2024-01-31T09:10:06.967889Z +2024-01-31T12:00:00Z,SC,2024-01-31T11:58:16.524756Z +2024-01-31T18:00:00Z,mg,2024-01-31T18:17:04.641392Z +2024-01-31T21:00:00Z,mg,2024-01-31T20:49:57.68738Z diff --git a/migrations/kdvh/kdvh_test.go b/migrations/tests/kdvh_test.go similarity index 77% rename from migrations/kdvh/kdvh_test.go rename to migrations/tests/kdvh_test.go index 196f12c5..98366021 100644 --- a/migrations/kdvh/kdvh_test.go +++ b/migrations/tests/kdvh_test.go @@ -1,4 +1,4 @@ -package kdvh +package tests import ( "context" @@ -12,11 +12,10 @@ import ( "migrate/kdvh/db" port "migrate/kdvh/import" "migrate/kdvh/import/cache" + "migrate/stinfosys" ) -const LARD_STRING string = "host=localhost user=postgres dbname=postgres password=postgres" - -type ImportTest struct { +type KdvhTestCase struct { table string station int32 elem string @@ -24,24 +23,26 @@ type ImportTest struct { expectedRows int64 } -func (t *ImportTest) mockConfig() (*port.Config, *cache.Cache) { +func (t *KdvhTestCase) mockConfig() (*port.Config, *cache.Cache) { return &port.Config{ Tables: []string{t.table}, Stations: []string{fmt.Sprint(t.station)}, Elements: []string{t.elem}, - BaseDir: "./tests", + Path: "./files", HasHeader: true, Sep: ";", }, &cache.Cache{ - Stinfo: cache.StinfoMap{ + Elements: stinfosys.ElemMap{ {ElemCode: t.elem, TableName: t.table}: { Fromtime: time.Date(2001, 7, 1, 9, 0, 0, 0, time.UTC), IsScalar: true, }, }, - StationPermits: cache.StationPermitMap{ - t.station: t.permit, + Permits: stinfosys.PermitMaps{ + StationPermits: stinfosys.StationPermitMap{ + t.station: t.permit, + }, }, } } @@ -55,14 +56,14 @@ func TestImportKDVH(t *testing.T) { } defer pool.Close() - testCases := []ImportTest{ + testCases := []KdvhTestCase{ {table: "T_MDATA", station: 12345, elem: "TA", permit: 0, expectedRows: 0}, // restricted TS {table: "T_MDATA", station: 12345, elem: "TA", permit: 1, expectedRows: 2644}, // open TS } kdvh := db.Init() - // TODO: test does not fail, if flags are not inserted + // TODO: test does not fail if flags are not inserted // TODO: bar does not work well with log print outs for _, c := range testCases { config, cache := c.mockConfig() diff --git a/migrations/tests/kvalobs_test.go b/migrations/tests/kvalobs_test.go new file mode 100644 index 00000000..67911859 --- /dev/null +++ b/migrations/tests/kvalobs_test.go @@ -0,0 +1,98 @@ +package tests + +import ( + "context" + "log" + "path/filepath" + "testing" + "time" + + "github.com/jackc/pgx/v5/pgxpool" + + kvalobs "migrate/kvalobs/db" + port "migrate/kvalobs/import" + "migrate/kvalobs/import/cache" + "migrate/stinfosys" + "migrate/utils" +) + +const LARD_STRING string = "host=localhost user=postgres dbname=postgres password=postgres" +const DUMPS_PATH string = "./files" + +type KvalobsTestCase struct { + db string + table string + station int32 + paramid int32 + typeid int32 + sensor *int32 + level *int32 + permit int32 + expectedRows int64 +} + +func (t *KvalobsTestCase) mockConfig() (*port.Config, *cache.Cache) { + fromtime, _ := time.Parse(time.DateOnly, "1900-01-01") + return &port.Config{ + BaseConfig: kvalobs.BaseConfig{ + Stations: []int32{t.station}, + }, + }, + &cache.Cache{ + Meta: map[cache.MetaKey]utils.TimeSpan{ + {Stationid: t.station}: {From: &fromtime}, + }, + Permits: stinfosys.PermitMaps{ + StationPermits: stinfosys.StationPermitMap{ + t.station: t.permit, + }, + }, + } +} + +func TestImportDataKvalobs(t *testing.T) { + log.SetFlags(log.LstdFlags | log.Lshortfile) + + pool, err := pgxpool.New(context.TODO(), LARD_STRING) + if err != nil { + t.Log("Could not connect to Lard:", err) + } + defer pool.Close() + + dbs := kvalobs.InitDBs() + + cases := []KvalobsTestCase{ + { + db: "histkvalobs", + table: "data", + station: 18700, + paramid: 313, + permit: 1, + expectedRows: 39, + }, + { + db: "kvalobs", + table: "text_data", + station: 18700, + permit: 1, + expectedRows: 182, + }, + } + + for _, c := range cases { + config, cache := c.mockConfig() + db := dbs[c.db] + + table := db.Tables[c.table] + table.Path = filepath.Join(DUMPS_PATH, db.Name, table.Name) + + insertedRows, err := port.ImportTable(table, cache, pool, config) + + switch { + case err != nil: + t.Fatal(err) + case insertedRows != c.expectedRows: + t.Fail() + } + } +} diff --git a/migrations/utils/indices.go b/migrations/utils/indices.go new file mode 100644 index 00000000..b75ee276 --- /dev/null +++ b/migrations/utils/indices.go @@ -0,0 +1,40 @@ +package utils + +import ( + "context" + "log/slog" + "os" + + "github.com/jackc/pgx/v5/pgxpool" +) + +func DropIndices(pool *pgxpool.Pool) { + slog.Info("Dropping table indices...") + + file, err := os.ReadFile("../db/drop_indices.sql") + if err != nil { + panic(err.Error()) + } + + _, err = pool.Exec(context.Background(), string(file)) + if err != nil { + panic(err.Error()) + } +} + +func CreateIndices(pool *pgxpool.Pool) { + slog.Info("Recreating table indices...") + + files := []string{"../db/public.sql", "../db/flags.sql"} + for _, filename := range files { + file, err := os.ReadFile(filename) + if err != nil { + panic(err.Error()) + } + + _, err = pool.Exec(context.Background(), string(file)) + if err != nil { + panic(err.Error()) + } + } +} diff --git a/migrations/utils/time.go b/migrations/utils/time.go new file mode 100644 index 00000000..caa674d1 --- /dev/null +++ b/migrations/utils/time.go @@ -0,0 +1,48 @@ +package utils + +import ( + "fmt" + "time" +) + +type Timestamp struct { + t time.Time +} + +func (ts *Timestamp) UnmarshalText(b []byte) error { + t, err := time.Parse(time.DateOnly, string(b)) + if err != nil { + return fmt.Errorf("Only the date-only format (\"YYYY-MM-DD\") is allowed. Got %s", b) + } + ts.t = t + return nil +} + +// func (ts *Timestamp) Format(layout string) string { +// return ts.t.Format(layout) +// } + +func (ts *Timestamp) Inner() *time.Time { + if ts == nil { + return nil + } + + return &ts.t +} + +type TimeSpan struct { + From *time.Time + To *time.Time +} + +func (t *TimeSpan) ToString() string { + from := "from" + to := "to" + if t.From != nil { + from += t.From.Format(time.DateOnly) + } + if t.To != nil { + to += t.To.Format(time.DateOnly) + } + return from + "_" + to +} diff --git a/migrations/utils/utils.go b/migrations/utils/utils.go index 31974362..4920499a 100644 --- a/migrations/utils/utils.go +++ b/migrations/utils/utils.go @@ -6,7 +6,9 @@ import ( "log/slog" "os" "slices" + "strconv" "strings" + "time" "github.com/schollz/progressbar/v3" ) @@ -29,6 +31,10 @@ func NewBar(size int, description string) *progressbar.ProgressBar { ) } +func IsEmptyOrEqual(first, second string) bool { + return first == "" || first == second +} + // Filters elements of a slice by comparing them to the elements of a reference slice. // formatMsg is an optional format string with a single format argument that can be used // to add context on why the element may be missing from the reference slice @@ -38,7 +44,7 @@ func FilterSlice[T comparable](slice, reference []T, formatMsg string) []T { } if formatMsg == "" { - formatMsg = "Value '%s' not present in reference slice, skipping" + formatMsg = "Value '%v' not present in reference slice, skipping" } // I hate this so much @@ -64,11 +70,66 @@ func SaveToFile(values []string, filename string) error { } func SetLogFile(table, procedure string) { - filename := fmt.Sprintf("%s_%s_log.txt", table, procedure) + filename := fmt.Sprintf("%s_%s_%s.log", table, procedure, time.Now().Format(time.RFC3339)) fh, err := os.Create(filename) if err != nil { - slog.Error(fmt.Sprintf("Could not create log '%s': %s", filename, err)) + slog.Error(fmt.Sprintf("Could not create log %q: %s", filename, err)) return } log.SetOutput(fh) } + +func ToInt32(s string) int32 { + res, err := strconv.ParseInt(s, 10, 32) + if err != nil { + // Panic is fine here, because we use this function only at startup + panic("Could not parse to int") + } + return int32(res) +} + +func Map[T, V any](ts []T, fn func(T) V) []V { + result := make([]V, len(ts)) + for i, t := range ts { + result[i] = fn(t) + } + return result +} + +// Similar to Map, but bails immediately if any error occurs +func TryMap[T, V any](ts []T, fn func(T) (V, error)) ([]V, error) { + result := make([]V, len(ts)) + for i, t := range ts { + temp, err := fn(t) + if err != nil { + return nil, err + } + result[i] = temp + } + return result, nil +} + +// Returns `true` if the slice is nil, otherwise checks if the element is +// contained in the slice +func IsEmptyOrContains[T comparable](s []T, v T) bool { + if s == nil { + return true + } + return slices.Contains(s, v) +} + +// Returns `true` if the slice is nil, +// `false` if the element pointer is nil, +// otherwise checks if the element is contained in the slice +func IsEmptyOrContainsPtr[T comparable](s []T, v *T) bool { + if s == nil { + return true + } + + if v == nil { + // Nil value is definitely not contained in non-nil slice + return false + } + + return slices.Contains(s, *v) +}