Skip to content
This repository was archived by the owner on Jul 22, 2024. It is now read-only.

Commit 4b3b1bf

Browse files
author
Ivan Gavryliuk
committed
Spark compatibility adjustments
1 parent 4d65464 commit 4b3b1bf

File tree

5 files changed

+12
-6
lines changed

5 files changed

+12
-6
lines changed

src/Parquet.Test/MapsTest.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,15 @@ public void Simple_first_level_map_int_to_string()
1818
ds.Add(1, new Dictionary<int, string>
1919
{
2020
[1] = "one",
21-
[2] = "two"
21+
[2] = "two",
22+
[3] = "three"
2223
});
2324

2425
//ParquetWriter.WriteFile(ds, "c:\\tmp\\map.parquet");
2526

2627
DataSet ds1 = DataSetGenerator.WriteRead(ds);
2728

28-
Assert.Equal("{1;[1=>one;2=>two]}", ds1[0].ToString());
29+
Assert.Equal("{1;[1=>one;2=>two;3=>three]}", ds1[0].ToString());
2930
}
3031
}
3132
}

src/Parquet/File/ColumnWriter.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ private List<PageTag> WriteValues(SchemaElement schema, IList values, Thrift.Pag
8080
ph.Data_page_header.Num_values = values.Count;
8181
}
8282

83-
if (schema.IsNullable || schema.MaxRepetitionLevel > 0)
83+
if (schema.IsNullable || schema.MaxDefinitionLevel > 0)
8484
{
8585
var dpack = new DefinitionPack(_schema, _formatOptions);
8686
values = dpack.Unpack(values, out definitions);

src/Parquet/File/FileMetadataBuilder.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ private static void AddMapSchema(List<TSchemaElement> container, SchemaElement s
101101
var root = new TSchemaElement(se.Name)
102102
{
103103
Converted_type = Thrift.ConvertedType.MAP,
104-
Num_children = 1
104+
Num_children = 1,
105+
Repetition_type = Thrift.FieldRepetitionType.OPTIONAL,
105106
};
106107
container.Add(root);
107108

src/Parquet/File/Values/RunLengthBitPackingHybridValuesWriter.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ private static void WriteData(BinaryWriter writer, List<int> data, int bitWidth)
5858
{
5959
//for simplicity, we're only going to write RLE, however bitpacking needs to be implemented as well
6060

61-
const int maxCount = 0b0111_1111_1111_1111; //max count for an integer with one lost bit
61+
const int maxCount = int.MaxValue >> 1; //max count for an integer with one lost bit
6262

6363
//chunk identical values and write
6464
int lastValue = 0;

src/spark-experiments/src/main/scala/maps.sc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,8 @@ val df = sc.parallelize(Seq(
2525
df.printSchema
2626
df.show
2727

28-
write(df, "c:\\tmp\\sparkmap.parquet")
28+
//write(df, "c:\\tmp\\sparkmap.parquet")
29+
30+
val cp = spark.read.parquet("c:\\tmp\\map.parquet")
31+
cp.printSchema
32+
cp.show

0 commit comments

Comments
 (0)