Skip to content

Commit

Permalink
Handle enums with too many values (#366)
Browse files Browse the repository at this point in the history
* Handle enums with too many values

* add log info message

* Change limit calculation
  • Loading branch information
DimitrisStaratzis authored Aug 26, 2024
1 parent 1f58377 commit 0873f40
Show file tree
Hide file tree
Showing 20 changed files with 54 additions and 14 deletions.
11 changes: 11 additions & 0 deletions mysql-test/mytile/r/enum.result
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,14 @@ select `dim1`, `attr1`, `fruit` from tiledb_with_enum where fruit = 'pear';
dim1 attr1 fruit
1 2 pear
DROP TABLE tiledb_with_enum;
# Test enum with too many values. Reverts to non-enum. Affected attribute `gene_symbol`
CREATE TABLE var ENGINE=mytile uri='MTR_SUITE_DIR/test_data/tabula-sapiens-immune-var';;
describe var;
Field Type Null Key Default Extra
soma_joinid bigint(20) NO PRI NULL
ensemblid text NO NULL
feature_type enum('Gene Expression') NO NULL
gene_symbol int(11) NO NULL
var_id text NO NULL
SET mytile_delete_arrays=0;
DROP TABLE var;
9 changes: 8 additions & 1 deletion mysql-test/mytile/t/enum.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,11 @@ WHERE table_name = 'tiledb_with_enum' order by column_name;

select `dim1`, `attr1`, `fruit` from tiledb_with_enum order by `dim1`;
select `dim1`, `attr1`, `fruit` from tiledb_with_enum where fruit = 'pear';
DROP TABLE tiledb_with_enum;
DROP TABLE tiledb_with_enum;

--echo # Test enum with too many values. Reverts to non-enum. Affected attribute `gene_symbol`
--replace_result $MTR_SUITE_DIR MTR_SUITE_DIR
--eval CREATE TABLE var ENGINE=mytile uri='$MTR_SUITE_DIR/test_data/tabula-sapiens-immune-var';
describe var;
SET mytile_delete_arrays=0;
DROP TABLE var;
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
48 changes: 35 additions & 13 deletions mytile/mytile-discovery.cc
Original file line number Diff line number Diff line change
Expand Up @@ -324,33 +324,55 @@ int tile::discover_array(THD *thd, TABLE_SHARE *ts, HA_CREATE_INFO *info) {
TileDBTypeToMysqlType(attribute.type(), attribute.cell_size() > 1,
attribute.cell_val_num());

// Handle enums
size_t enum_values = 0;
std::stringstream enum_string;
bool empty_enum = false;
// first we need to see if the enum values are too many

if (is_enum) {
// if the attribute has an enum
// if the attribute has an enum calculate enum values
auto enmr = tiledb::ArrayExperimental::get_enumeration(
ctx, array, enmr_name.value());

auto enum_vec_string = enmr.as_vector<std::string>();
// store enum values
enum_values = enum_vec_string.size();

if (enum_vec_string.size() == 0) {
empty_enum = true;
}

if (enum_vec_string.size() != 0) {
sql_string << "ENUM"
<< "(";
for (size_t i = 0; i < enum_vec_string.size(); ++i) {
sql_string << "'" << enum_vec_string[i] << "'";
if (i < enum_vec_string.size() - 1) {
sql_string << ", ";
}
enum_string << "ENUM" << "(";
for (size_t i = 0; i < enum_vec_string.size(); ++i) {
enum_string << "'" << enum_vec_string[i] << "'";
if (i < enum_vec_string.size() - 1) {
enum_string << ", ";
}
sql_string << ")";
} else {
sql_string << MysqlTypeString(mysql_type);
}
} else {
enum_string << ")";
}

// bool to check if enum is oversized and we should use its plain type
bool over_sized_enum = enum_string.str().size() > (65536 / schema->attribute_num());

if (is_enum && !over_sized_enum && !empty_enum) {
// if the attribute has an enum and the enum values are not too many
sql_string << enum_string.str();
} else { // if not a usable enum continue as normal and use plain type
if (is_enum && over_sized_enum) {
std::string logMessage =
"Attribute " + attribute.name() +
" has too many enum values. Mytile is using its base type";
log_info(thd, logMessage.c_str());
}
if (mysql_type == MYSQL_TYPE_VARCHAR) {
sql_string << "TEXT";
} else {
sql_string << MysqlTypeString(mysql_type);
}
}

if (!MysqlBlobType(enum_field_types(mysql_type)) &&
TileDBTypeIsUnsigned(attribute.type()))
sql_string << " UNSIGNED";
Expand Down

0 comments on commit 0873f40

Please sign in to comment.