Skip to content

Commit 6597467

Browse files
authored
apacheGH-42100: [C++][Parquet] ParquetFilePrinter::JSONPrint print length of FLBA (apache#41981)
### Rationale for this change Print FLBA length in `ParquetFilePrinter::JSONPrint` ### What changes are included in this PR? Print FLBA length in `ParquetFilePrinter::JSONPrint` ### Are these changes tested? no need ### Are there any user-facing changes? no * GitHub Issue: apache#42100 Authored-by: mwish <[email protected]> Signed-off-by: mwish <[email protected]>
1 parent 64b1109 commit 6597467

File tree

2 files changed

+40
-10
lines changed

2 files changed

+40
-10
lines changed

cpp/src/parquet/printer.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,8 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list<int> selected
246246
const ColumnDescriptor* descr = file_metadata->schema()->Column(i);
247247
stream << " { \"Id\": \"" << i << "\","
248248
<< " \"Name\": \"" << descr->path()->ToDotString() << "\","
249-
<< " \"PhysicalType\": \"" << TypeToString(descr->physical_type()) << "\","
249+
<< " \"PhysicalType\": \""
250+
<< TypeToString(descr->physical_type(), descr->type_length()) << "\","
250251
<< " \"ConvertedType\": \"" << ConvertedTypeToString(descr->converted_type())
251252
<< "\","
252253
<< " \"LogicalType\": " << (descr->logical_type())->ToJSON() << " }";

cpp/src/parquet/reader_test.cc

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,7 +1077,24 @@ Column 1
10771077
ASSERT_EQ(header_output + dump_output, ss_dump.str());
10781078
}
10791079

1080-
TEST(TestJSONWithLocalFile, JSONOutput) {
1080+
class TestJSONWithLocalFile : public ::testing::Test {
1081+
public:
1082+
static std::string ReadFromLocalFile(std::string_view local_file_name) {
1083+
std::stringstream ss;
1084+
// empty list means print all
1085+
std::list<int> columns;
1086+
1087+
auto reader =
1088+
ParquetFileReader::OpenFile(data_file(local_file_name.data()),
1089+
/*memory_map=*/false, default_reader_properties());
1090+
ParquetFilePrinter printer(reader.get());
1091+
printer.JSONPrint(ss, columns, local_file_name.data());
1092+
1093+
return ss.str();
1094+
}
1095+
};
1096+
1097+
TEST_F(TestJSONWithLocalFile, JSONOutput) {
10811098
std::string json_output = R"###({
10821099
"FileName": "alltypes_plain.parquet",
10831100
"Version": "1.0",
@@ -1131,16 +1148,28 @@ TEST(TestJSONWithLocalFile, JSONOutput) {
11311148
}
11321149
)###";
11331150

1134-
std::stringstream ss;
1135-
// empty list means print all
1136-
std::list<int> columns;
1151+
std::string json_content = ReadFromLocalFile("alltypes_plain.parquet");
1152+
ASSERT_EQ(json_output, json_content);
1153+
}
11371154

1138-
auto reader =
1139-
ParquetFileReader::OpenFile(alltypes_plain(), false, default_reader_properties());
1140-
ParquetFilePrinter printer(reader.get());
1141-
printer.JSONPrint(ss, columns, "alltypes_plain.parquet");
1155+
TEST_F(TestJSONWithLocalFile, JSONOutputFLBA) {
1156+
// min-max stats for FLBA contains non-utf8 output, so we don't check
1157+
// the whole json output.
1158+
std::string json_content = ReadFromLocalFile("fixed_length_byte_array.parquet");
1159+
1160+
std::string json_contains = R"###({
1161+
"FileName": "fixed_length_byte_array.parquet",
1162+
"Version": "1.0",
1163+
"CreatedBy": "parquet-mr version 1.13.0-SNAPSHOT (build d057b39d93014fe40f5067ee4a33621e65c91552)",
1164+
"TotalRows": "1000",
1165+
"NumberOfRowGroups": "1",
1166+
"NumberOfRealColumns": "1",
1167+
"NumberOfColumns": "1",
1168+
"Columns": [
1169+
{ "Id": "0", "Name": "flba_field", "PhysicalType": "FIXED_LEN_BYTE_ARRAY(4)", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} }
1170+
])###";
11421171

1143-
ASSERT_EQ(json_output, ss.str());
1172+
EXPECT_THAT(json_content, testing::HasSubstr(json_contains));
11441173
}
11451174

11461175
TEST(TestFileReader, BufferedReadsWithDictionary) {

0 commit comments

Comments
 (0)