Skip to content

Commit 9b4e5ea

Browse files
authored
Add serialization and API changes for post_array_schema_from_rest. (#5237)
This factors out serialization and API changes in #5181 that are required for the HandleGetArraySchema route. These changes will need to be available on REST before we can enable the new route for loading the array schema. There is a quick summary of the changes required in [SC-52877](https://app.shortcut.com/tiledb-inc/story/52877/core-serialization-changes-for-loadarrayschema-models). --- TYPE: IMPROVEMENT DESC: Add serialization and API changes for post_array_schema_from_rest.
1 parent 910fffd commit 9b4e5ea

26 files changed

+633
-116
lines changed

test/src/unit-capi-config.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ void check_save_to_file() {
230230
ss << "rest.curl.buffer_size 524288\n";
231231
ss << "rest.curl.verbose false\n";
232232
ss << "rest.http_compressor any\n";
233-
ss << "rest.load_enumerations_on_array_open true\n";
233+
ss << "rest.load_enumerations_on_array_open false\n";
234234
ss << "rest.load_metadata_on_array_open true\n";
235235
ss << "rest.load_non_empty_domain_on_array_open true\n";
236236
ss << "rest.retry_count 25\n";

test/src/unit-enumerations.cc

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,6 +1120,69 @@ TEST_CASE_METHOD(
11201120
REQUIRE(schema->is_enumeration_loaded("test_enmr") == true);
11211121
}
11221122

1123+
TEST_CASE_METHOD(
1124+
EnumerationFx,
1125+
"Array - Load All Enumerations - All Schemas",
1126+
"[enumeration][array][load-all-enumerations][all-schemas]") {
1127+
create_array();
1128+
auto array = get_array(QueryType::READ);
1129+
auto schema = array->array_schema_latest_ptr();
1130+
REQUIRE(schema->is_enumeration_loaded("test_enmr") == false);
1131+
std::string schema_name_1 = schema->name();
1132+
1133+
// Evolve once to add an enumeration.
1134+
auto ase = make_shared<ArraySchemaEvolution>(HERE(), memory_tracker_);
1135+
std::vector<std::string> var_values{"one", "two", "three"};
1136+
auto var_enmr = create_enumeration(
1137+
var_values, false, Datatype::STRING_ASCII, "ase_var_enmr");
1138+
ase->add_enumeration(var_enmr);
1139+
auto attr4 = make_shared<Attribute>(HERE(), "attr4", Datatype::UINT16);
1140+
attr4->set_enumeration_name("ase_var_enmr");
1141+
CHECK_NOTHROW(ase->evolve_schema(schema));
1142+
// Apply evolution to the array and reopen.
1143+
CHECK_NOTHROW(Array::evolve_array_schema(
1144+
ctx_.resources(), uri_, ase.get(), array->get_encryption_key()));
1145+
CHECK(array->reopen().ok());
1146+
CHECK_NOTHROW(array->load_all_enumerations());
1147+
auto all_schemas = array->array_schemas_all();
1148+
schema = array->array_schema_latest_ptr();
1149+
std::string schema_name_2 = schema->name();
1150+
1151+
// Check all schemas.
1152+
CHECK(all_schemas[schema_name_1]->is_enumeration_loaded("test_enmr") == true);
1153+
CHECK(all_schemas[schema_name_2]->is_enumeration_loaded("test_enmr") == true);
1154+
CHECK(
1155+
all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") ==
1156+
true);
1157+
1158+
// Evolve a second time to drop an enumeration.
1159+
ase = make_shared<ArraySchemaEvolution>(HERE(), memory_tracker_);
1160+
ase->drop_enumeration("test_enmr");
1161+
ase->drop_attribute("attr1");
1162+
CHECK_NOTHROW(ase->evolve_schema(schema));
1163+
// Apply evolution to the array and reopen.
1164+
CHECK_NOTHROW(Array::evolve_array_schema(
1165+
ctx_.resources(), uri_, ase.get(), array->get_encryption_key()));
1166+
CHECK(array->reopen().ok());
1167+
CHECK_NOTHROW(array->load_all_enumerations());
1168+
all_schemas = array->array_schemas_all();
1169+
schema = array->array_schema_latest_ptr();
1170+
std::string schema_name_3 = schema->name();
1171+
1172+
// Check all schemas.
1173+
CHECK(all_schemas[schema_name_1]->is_enumeration_loaded("test_enmr") == true);
1174+
CHECK(all_schemas[schema_name_2]->is_enumeration_loaded("test_enmr") == true);
1175+
CHECK(
1176+
all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") ==
1177+
true);
1178+
CHECK_THROWS_WITH(
1179+
all_schemas[schema_name_3]->is_enumeration_loaded("test_enmr"),
1180+
Catch::Matchers::ContainsSubstring("No enumeration named"));
1181+
CHECK(
1182+
all_schemas[schema_name_3]->is_enumeration_loaded("ase_var_enmr") ==
1183+
true);
1184+
}
1185+
11231186
TEST_CASE_METHOD(
11241187
EnumerationFx,
11251188
"Array - Load All Enumerations - Repeated",

test/src/unit-request-handlers.cc

Lines changed: 100 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232

3333
#ifdef TILEDB_SERIALIZATION
3434

35+
#include "test/support/src/helpers.h"
3536
#include "test/support/src/mem_helpers.h"
3637
#include "test/support/tdb_catch.h"
3738
#include "tiledb/api/c_api/buffer/buffer_api_internal.h"
@@ -41,6 +42,7 @@
4142
#include "tiledb/sm/c_api/tiledb_serialization.h"
4243
#include "tiledb/sm/c_api/tiledb_struct_def.h"
4344
#include "tiledb/sm/cpp_api/tiledb"
45+
#include "tiledb/sm/cpp_api/tiledb_experimental"
4446
#include "tiledb/sm/crypto/encryption_key.h"
4547
#include "tiledb/sm/enums/array_type.h"
4648
#include "tiledb/sm/enums/encryption_type.h"
@@ -67,6 +69,7 @@ struct RequestHandlerFx {
6769
Config cfg_;
6870
Context ctx_;
6971
EncryptionKey enc_key_;
72+
shared_ptr<ArraySchema> schema_;
7073
};
7174

7275
struct HandleLoadArraySchemaRequestFx : RequestHandlerFx {
@@ -75,11 +78,17 @@ struct HandleLoadArraySchemaRequestFx : RequestHandlerFx {
7578
}
7679

7780
virtual shared_ptr<ArraySchema> create_schema() override;
78-
shared_ptr<ArraySchema> call_handler(
81+
82+
std::tuple<
83+
shared_ptr<ArraySchema>,
84+
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
85+
call_handler(
7986
serialization::LoadArraySchemaRequest req, SerializationType stype);
8087

8188
shared_ptr<const Enumeration> create_string_enumeration(
8289
std::string name, std::vector<std::string>& values);
90+
91+
shared_ptr<ArraySchema> schema_add_attribute(const std::string& attr_name);
8392
};
8493

8594
struct HandleQueryPlanRequestFx : RequestHandlerFx {
@@ -116,15 +125,23 @@ struct HandleConsolidationPlanRequestFx : RequestHandlerFx {
116125

117126
TEST_CASE_METHOD(
118127
HandleLoadArraySchemaRequestFx,
119-
"tiledb_handle_load_array_schema_request - default request",
128+
"tiledb_handle_load_array_schema_request - no enumerations",
120129
"[request_handler][load_array_schema][default]") {
121130
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);
122131

123132
create_array();
124-
auto schema =
125-
call_handler(serialization::LoadArraySchemaRequest(false), stype);
133+
auto schema_response =
134+
call_handler(serialization::LoadArraySchemaRequest(cfg_), stype);
135+
auto schema = std::get<0>(schema_response);
126136
REQUIRE(schema->has_enumeration("enmr"));
127137
REQUIRE(schema->get_loaded_enumeration_names().size() == 0);
138+
tiledb::test::schema_equiv(*schema, *schema_);
139+
140+
// We did not evolve the schema so there should only be one.
141+
auto all_schemas = std::get<1>(schema_response);
142+
REQUIRE(all_schemas.size() == 1);
143+
tiledb::test::schema_equiv(
144+
*all_schemas.find(schema->name())->second, *schema_);
128145
}
129146

130147
TEST_CASE_METHOD(
@@ -134,12 +151,57 @@ TEST_CASE_METHOD(
134151
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);
135152

136153
create_array();
137-
auto schema =
138-
call_handler(serialization::LoadArraySchemaRequest(true), stype);
154+
REQUIRE(cfg_.set("rest.load_enumerations_on_array_open", "true").ok());
155+
auto schema_response =
156+
call_handler(serialization::LoadArraySchemaRequest(cfg_), stype);
157+
auto schema = std::get<0>(schema_response);
139158
REQUIRE(schema->has_enumeration("enmr"));
140159
REQUIRE(schema->get_loaded_enumeration_names().size() == 1);
141160
REQUIRE(schema->get_loaded_enumeration_names()[0] == "enmr");
142161
REQUIRE(schema->get_enumeration("enmr") != nullptr);
162+
tiledb::test::schema_equiv(*schema, *schema_);
163+
164+
// We did not evolve the schema so there should only be one.
165+
auto all_schemas = std::get<1>(schema_response);
166+
REQUIRE(all_schemas.size() == 1);
167+
tiledb::test::schema_equiv(
168+
*all_schemas.find(schema->name())->second, *schema_);
169+
}
170+
171+
TEST_CASE_METHOD(
172+
HandleLoadArraySchemaRequestFx,
173+
"tiledb_handle_load_array_schema_request - multiple schemas",
174+
"[request_handler][load_array_schema][schema-evolution]") {
175+
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);
176+
std::string load_enums = GENERATE("true", "false");
177+
178+
create_array();
179+
180+
std::vector<shared_ptr<ArraySchema>> all_schemas{schema_};
181+
all_schemas.push_back(schema_add_attribute("b"));
182+
all_schemas.push_back(schema_add_attribute("c"));
183+
all_schemas.push_back(schema_add_attribute("d"));
184+
185+
REQUIRE(cfg_.set("rest.load_enumerations_on_array_open", load_enums).ok());
186+
auto schema_response =
187+
call_handler(serialization::LoadArraySchemaRequest(cfg_), stype);
188+
auto schema = std::get<0>(schema_response);
189+
if (load_enums == "true") {
190+
REQUIRE(schema->has_enumeration("enmr"));
191+
REQUIRE(schema->get_loaded_enumeration_names().size() == 1);
192+
REQUIRE(schema->get_loaded_enumeration_names()[0] == "enmr");
193+
REQUIRE(schema->get_enumeration("enmr") != nullptr);
194+
}
195+
// The latest schema should be equal to the last applied evolution.
196+
tiledb::test::schema_equiv(*schema, *all_schemas.back());
197+
198+
// Validate schemas returned from the request in the order they were created.
199+
auto r_all_schemas = std::get<1>(schema_response);
200+
std::map<std::string, shared_ptr<ArraySchema>> resp(
201+
r_all_schemas.begin(), r_all_schemas.end());
202+
for (int i = 0; const auto& s : resp) {
203+
tiledb::test::schema_equiv(*s.second, *all_schemas[i++]);
204+
}
143205
}
144206

145207
TEST_CASE_METHOD(
@@ -346,7 +408,9 @@ TEST_CASE_METHOD(
346408
RequestHandlerFx::RequestHandlerFx(const std::string uri)
347409
: memory_tracker_(tiledb::test::create_test_memory_tracker())
348410
, uri_(uri)
349-
, ctx_(cfg_) {
411+
, ctx_(cfg_)
412+
, schema_(make_shared<ArraySchema>(
413+
ArrayType::DENSE, ctx_.resources().ephemeral_memory_tracker())) {
350414
delete_array();
351415
throw_if_not_ok(enc_key_.set_key(EncryptionType::NO_ENCRYPTION, nullptr, 0));
352416
}
@@ -405,9 +469,28 @@ HandleLoadArraySchemaRequestFx::create_string_enumeration(
405469
tiledb::test::create_test_memory_tracker());
406470
}
407471

472+
shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::schema_add_attribute(
473+
const std::string& attr_name) {
474+
tiledb::Context ctx;
475+
tiledb::ArraySchemaEvolution ase(ctx);
476+
auto attr = tiledb::Attribute::create<int32_t>(ctx, attr_name);
477+
ase.add_attribute(attr);
478+
// Evolve and update the original schema member variable.
479+
schema_ = ase.ptr()->array_schema_evolution_->evolve_schema(schema_);
480+
// Apply the schema evolution.
481+
Array::evolve_array_schema(
482+
this->ctx_.resources(),
483+
this->uri_,
484+
ase.ptr()->array_schema_evolution_,
485+
this->enc_key_);
486+
487+
// Return the new evolved schema for validation.
488+
return schema_;
489+
}
490+
408491
shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::create_schema() {
409492
// Create a schema to serialize
410-
auto schema =
493+
schema_ =
411494
make_shared<ArraySchema>(HERE(), ArrayType::SPARSE, memory_tracker_);
412495
auto dim =
413496
make_shared<Dimension>(HERE(), "dim1", Datatype::INT32, memory_tracker_);
@@ -416,20 +499,23 @@ shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::create_schema() {
416499

417500
auto dom = make_shared<Domain>(HERE(), memory_tracker_);
418501
throw_if_not_ok(dom->add_dimension(dim));
419-
throw_if_not_ok(schema->set_domain(dom));
502+
throw_if_not_ok(schema_->set_domain(dom));
420503

421504
std::vector<std::string> values = {"pig", "cow", "chicken", "dog", "cat"};
422505
auto enmr = create_string_enumeration("enmr", values);
423-
schema->add_enumeration(enmr);
506+
schema_->add_enumeration(enmr);
424507

425508
auto attr = make_shared<Attribute>(HERE(), "attr", Datatype::INT32);
426509
attr->set_enumeration_name("enmr");
427-
throw_if_not_ok(schema->add_attribute(attr));
510+
throw_if_not_ok(schema_->add_attribute(attr));
428511

429-
return schema;
512+
return schema_;
430513
}
431514

432-
shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::call_handler(
515+
std::tuple<
516+
shared_ptr<ArraySchema>,
517+
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
518+
HandleLoadArraySchemaRequestFx::call_handler(
433519
serialization::LoadArraySchemaRequest req, SerializationType stype) {
434520
// If this looks weird, its because we're using the public C++ API to create
435521
// these objets instead of the internal APIs elsewhere in this test suite.
@@ -451,7 +537,7 @@ shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::call_handler(
451537
REQUIRE(rval == TILEDB_OK);
452538

453539
return serialization::deserialize_load_array_schema_response(
454-
stype, resp_buf->buffer(), memory_tracker_);
540+
uri_, stype, resp_buf->buffer(), memory_tracker_);
455541
}
456542

457543
shared_ptr<ArraySchema> HandleQueryPlanRequestFx::create_schema() {

test/support/src/helpers.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1621,6 +1621,26 @@ void read_sparse_v11(
16211621
tiledb_query_free(&query);
16221622
}
16231623

1624+
void schema_equiv(
1625+
const sm::ArraySchema& schema1, const sm::ArraySchema& schema2) {
1626+
CHECK(schema1.array_type() == schema2.array_type());
1627+
CHECK(schema1.attributes().size() == schema2.attributes().size());
1628+
for (unsigned int i = 0; i < schema2.attribute_num(); i++) {
1629+
auto a = schema1.attribute(i);
1630+
auto b = schema2.attribute(i);
1631+
CHECK(a->cell_val_num() == b->cell_val_num());
1632+
CHECK(a->name() == b->name());
1633+
CHECK(a->type() == b->type());
1634+
CHECK(a->nullable() == b->nullable());
1635+
CHECK(a->get_enumeration_name() == b->get_enumeration_name());
1636+
}
1637+
CHECK(schema1.capacity() == schema2.capacity());
1638+
CHECK(schema1.cell_order() == schema2.cell_order());
1639+
CHECK(schema1.tile_order() == schema2.tile_order());
1640+
CHECK(schema1.allows_dups() == schema2.allows_dups());
1641+
CHECK(schema1.array_uri().to_string() == schema2.array_uri().to_string());
1642+
}
1643+
16241644
template void check_subarray<int8_t>(
16251645
tiledb::sm::Subarray& subarray, const SubarrayRanges<int8_t>& ranges);
16261646
template void check_subarray<uint8_t>(

test/support/src/helpers.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -957,6 +957,15 @@ void write_sparse_v11(
957957
*/
958958
void read_sparse_v11(
959959
tiledb_ctx_t* ctx, const std::string& array_name, uint64_t timestamp);
960+
961+
/**
962+
* Helper function to test two array schemas are equivalent.
963+
*
964+
* @param schema1 Expected array schema.
965+
* @param schema2 Actual array schema.
966+
*/
967+
void schema_equiv(
968+
const sm::ArraySchema& schema1, const sm::ArraySchema& schema2);
960969
} // namespace tiledb::test
961970

962971
#endif

0 commit comments

Comments
 (0)