Skip to content
This repository was archived by the owner on Jul 22, 2024. It is now read-only.

Commit 79c0a3c

Browse files
author
Ivan Gavryliuk
committed
vnext stuff
1 parent e69e21e commit 79c0a3c

20 files changed

+116
-112
lines changed

src/Parquet/Data/EmulationMode.cs

-13
This file was deleted.

src/Parquet/Data/Schema/SchemaElement.cs

+16-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
using System.Collections.Generic;
44
using System.Collections;
55
using System.Diagnostics;
6+
using Parquet.DataTypes;
67

78
namespace Parquet.Data
89
{
@@ -42,9 +43,22 @@ public class SchemaElement : IEquatable<SchemaElement>
4243
private readonly List<SchemaElement> _extra = new List<SchemaElement>();
4344
private string _path;
4445
private string _pathName;
45-
#pragma warning disable IDE1006 // Naming Styles
4646
private static readonly FileMetadataBuilder Builder = new FileMetadataBuilder();
47-
#pragma warning restore IDE1006 // Naming Styles
47+
48+
#region [ vNext ]
49+
50+
internal DataType DataType { get; private set; }
51+
52+
internal List<SchemaElement> NewChildren { get; private set; } = new List<SchemaElement>();
53+
54+
internal SchemaElement(string name, DataType dataType, SchemaElement parent)
55+
{
56+
Name = name ?? throw new ArgumentNullException(nameof(name));
57+
DataType = dataType;
58+
Parent = parent;
59+
}
60+
61+
#endregion
4862

4963
/// <summary>
5064
/// Gets the children schemas. Made internal temporarily, until we can actually read nested structures.

src/Parquet/Data/Schema/SchemaElement2.cs

-36
This file was deleted.

src/Parquet/DataTypes/BasicDataType.cs

+16-9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
using System.Collections.Generic;
1+
using System;
2+
using System.Collections;
3+
using System.Collections.Generic;
24
using Parquet.Data;
35

46
namespace Parquet.DataTypes
@@ -23,24 +25,29 @@ public virtual bool IsMatch(Thrift.SchemaElement tse, ParquetOptions formatOptio
2325
return tse.__isset.type && _thriftType == tse.Type;
2426
}
2527

26-
public SchemaElement2 Create(SchemaElement2 parent, IList<Thrift.SchemaElement> schema, ref int index)
28+
public SchemaElement Create(SchemaElement parent, IList<Thrift.SchemaElement> schema, ref int index)
2729
{
2830
Thrift.SchemaElement tse = schema[index++];
2931

3032
if(tse.Repetition_type == Thrift.FieldRepetitionType.REPEATED)
3133
{
32-
var list = new SchemaElement2(tse.Name, DataType.List, parent);
33-
parent.Children.Add(list);
34-
SchemaElement2 sei = CreateSimple(list, tse);
35-
list.Children.Add(sei);
34+
var list = new SchemaElement(tse.Name, DataType.List, parent);
35+
parent.NewChildren.Add(list);
36+
SchemaElement sei = CreateSimple(list, tse);
37+
list.NewChildren.Add(sei);
3638
return null;
3739
}
3840

39-
SchemaElement2 se = CreateSimple(parent, tse);
40-
parent.Children.Add(se);
41+
SchemaElement se = CreateSimple(parent, tse);
42+
parent.NewChildren.Add(se);
4143
return null;
4244
}
4345

44-
protected abstract SchemaElement2 CreateSimple(SchemaElement2 parent, Thrift.SchemaElement tse);
46+
protected abstract SchemaElement CreateSimple(SchemaElement parent, Thrift.SchemaElement tse);
47+
48+
public virtual IList Read(byte[] data)
49+
{
50+
throw new NotImplementedException();
51+
}
4552
}
4653
}
+32-5
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,45 @@
1-
using System.Collections.Generic;
1+
using System.Collections;
2+
using System.Collections.Generic;
23
using Parquet.Data;
3-
using Parquet.Thrift;
44

55
namespace Parquet.DataTypes
66
{
77
class BooleanDataType : BasicDataType<bool>
88
{
9-
public BooleanDataType() : base(Type.BOOLEAN, null, 1)
9+
public BooleanDataType() : base(Thrift.Type.BOOLEAN, null, 1)
1010
{
1111
}
1212

13-
protected override SchemaElement2 CreateSimple(SchemaElement2 parent, Thrift.SchemaElement tse)
13+
protected override SchemaElement CreateSimple(SchemaElement parent, Thrift.SchemaElement tse)
1414
{
15-
return new SchemaElement2(tse.Name, DataType.Boolean, parent);
15+
return new SchemaElement(tse.Name, DataType.Boolean, parent);
16+
}
17+
18+
public override IList Read(byte[] data)
19+
{
20+
var dest = new List<bool>();
21+
22+
int ibit = 0;
23+
int ibyte = 0;
24+
byte b = data[0];
25+
26+
while(ibyte < data.Length)
27+
{
28+
if (ibit == 8)
29+
{
30+
if (ibyte + 1 >= data.Length)
31+
{
32+
break;
33+
}
34+
b = data[++ibyte];
35+
ibit = 0;
36+
}
37+
38+
bool set = ((b >> ibit++) & 1) == 1;
39+
dest.Add(set);
40+
}
41+
42+
return dest;
1643
}
1744
}
1845
}

src/Parquet/DataTypes/ByteArrayDataType.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ public ByteArrayDataType() : base(Thrift.Type.BYTE_ARRAY)
88
{
99
}
1010

11-
protected override SchemaElement2 CreateSimple(SchemaElement2 parent, Thrift.SchemaElement tse)
11+
protected override SchemaElement CreateSimple(SchemaElement parent, Thrift.SchemaElement tse)
1212
{
13-
return new SchemaElement2(tse.Name, DataType.ByteArray, parent);
13+
return new SchemaElement(tse.Name, DataType.ByteArray, parent);
1414
}
1515
}
1616
}

src/Parquet/DataTypes/DateTimeOffsetDataType.cs

+4-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
using System.Collections.Generic;
33
using System.Text;
44
using Parquet.Data;
5-
using Parquet.Thrift;
65

76
namespace Parquet.DataTypes
87
{
@@ -19,14 +18,14 @@ public override bool IsMatch(Thrift.SchemaElement tse, ParquetOptions formatOpti
1918

2019
tse.Type == Thrift.Type.INT96 || //Impala
2120

22-
(tse.Type == Thrift.Type.INT64 && tse.__isset.converted_type && tse.Converted_type == ConvertedType.TIMESTAMP_MILLIS) ||
21+
(tse.Type == Thrift.Type.INT64 && tse.__isset.converted_type && tse.Converted_type == Thrift.ConvertedType.TIMESTAMP_MILLIS) ||
2322

24-
(tse.Type == Thrift.Type.INT32 && tse.__isset.converted_type && tse.Converted_type == ConvertedType.DATE);
23+
(tse.Type == Thrift.Type.INT32 && tse.__isset.converted_type && tse.Converted_type == Thrift.ConvertedType.DATE);
2524
}
2625

27-
protected override SchemaElement2 CreateSimple(SchemaElement2 parent, Thrift.SchemaElement tse)
26+
protected override SchemaElement CreateSimple(SchemaElement parent, Thrift.SchemaElement tse)
2827
{
29-
return new SchemaElement2(tse.Name, DataType.DateTimeOffset, parent);
28+
return new SchemaElement(tse.Name, DataType.DateTimeOffset, parent);
3029
}
3130
}
3231
}

src/Parquet/DataTypes/DecimalDataType.cs

+4-5
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,20 @@
22
using System.Collections.Generic;
33
using System.Text;
44
using Parquet.Data;
5-
using Parquet.Thrift;
65

76
namespace Parquet.DataTypes
87
{
98
class DecimalDataType : BasicDataType<decimal>
109
{
11-
public DecimalDataType() : base(Thrift.Type.FIXED_LEN_BYTE_ARRAY, ConvertedType.DECIMAL)
10+
public DecimalDataType() : base(Thrift.Type.FIXED_LEN_BYTE_ARRAY, Thrift.ConvertedType.DECIMAL)
1211
{
1312
}
1413

1514
public override bool IsMatch(Thrift.SchemaElement tse, ParquetOptions formatOptions)
1615
{
1716
return
1817

19-
tse.__isset.converted_type && tse.Converted_type == ConvertedType.DECIMAL &&
18+
tse.__isset.converted_type && tse.Converted_type == Thrift.ConvertedType.DECIMAL &&
2019

2120
(
2221
tse.Type == Thrift.Type.FIXED_LEN_BYTE_ARRAY ||
@@ -25,9 +24,9 @@ public override bool IsMatch(Thrift.SchemaElement tse, ParquetOptions formatOpti
2524
);
2625
}
2726

28-
protected override SchemaElement2 CreateSimple(SchemaElement2 parent, Thrift.SchemaElement tse)
27+
protected override SchemaElement CreateSimple(SchemaElement parent, Thrift.SchemaElement tse)
2928
{
30-
return new SchemaElement2(tse.Name, DataType.Decimal, parent);
29+
return new SchemaElement(tse.Name, DataType.Decimal, parent);
3130
}
3231
}
3332
}

src/Parquet/DataTypes/DoubleDataType.cs

+2-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
using System.Collections.Generic;
33
using System.Text;
44
using Parquet.Data;
5-
using Parquet.Thrift;
65

76
namespace Parquet.DataTypes
87
{
@@ -13,9 +12,9 @@ public DoubleDataType() : base(Thrift.Type.DOUBLE)
1312

1413
}
1514

16-
protected override SchemaElement2 CreateSimple(SchemaElement2 parent, Thrift.SchemaElement tse)
15+
protected override SchemaElement CreateSimple(SchemaElement parent, Thrift.SchemaElement tse)
1716
{
18-
return new SchemaElement2(tse.Name, DataType.Double, parent);
17+
return new SchemaElement(tse.Name, DataType.Double, parent);
1918
}
2019
}
2120
}

src/Parquet/DataTypes/FloatDataType.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ public FloatDataType() : base(Thrift.Type.FLOAT)
88
{
99
}
1010

11-
protected override SchemaElement2 CreateSimple(SchemaElement2 parent, Thrift.SchemaElement tse)
11+
protected override SchemaElement CreateSimple(SchemaElement parent, Thrift.SchemaElement tse)
1212
{
13-
return new SchemaElement2(tse.Name, DataType.Float, parent);
13+
return new SchemaElement(tse.Name, DataType.Float, parent);
1414
}
1515
}
1616
}

src/Parquet/DataTypes/IDataType.cs

+3-1
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ interface IDataType
1111
{
1212
bool IsMatch(Thrift.SchemaElement tse, ParquetOptions formatOptions);
1313

14-
SchemaElement2 Create(SchemaElement2 parent, IList<Thrift.SchemaElement> schema, ref int index);
14+
SchemaElement Create(SchemaElement parent, IList<Thrift.SchemaElement> schema, ref int index);
1515

1616
int? BitWidth { get; }
17+
18+
IList Read(byte[] data);
1719
}
1820
}

src/Parquet/DataTypes/Int32DataType.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ public Int32DataType() : base(Thrift.Type.INT32, null, 32)
88
{
99
}
1010

11-
protected override SchemaElement2 CreateSimple(SchemaElement2 parent, Thrift.SchemaElement tse)
11+
protected override SchemaElement CreateSimple(SchemaElement parent, Thrift.SchemaElement tse)
1212
{
13-
return new SchemaElement2(tse.Name, DataType.Int32, parent);
13+
return new SchemaElement(tse.Name, DataType.Int32, parent);
1414
}
1515
}
1616
}

src/Parquet/DataTypes/Int64DataType.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ public Int64DataType() : base(Thrift.Type.INT64)
88
{
99
}
1010

11-
protected override SchemaElement2 CreateSimple(SchemaElement2 parent, Thrift.SchemaElement tse)
11+
protected override SchemaElement CreateSimple(SchemaElement parent, Thrift.SchemaElement tse)
1212
{
13-
return new SchemaElement2(tse.Name, DataType.Int64, parent);
13+
return new SchemaElement(tse.Name, DataType.Int64, parent);
1414
}
1515
}
1616
}

src/Parquet/DataTypes/IntervalDataType.cs

+3-4
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,19 @@
33
using System.Text;
44
using Parquet.Data;
55
using Parquet.File.Values.Primitives;
6-
using Parquet.Thrift;
76

87
namespace Parquet.DataTypes
98
{
109
class IntervalDataType : BasicDataType<Interval>
1110
{
12-
public IntervalDataType() : base(Thrift.Type.FIXED_LEN_BYTE_ARRAY, ConvertedType.INTERVAL)
11+
public IntervalDataType() : base(Thrift.Type.FIXED_LEN_BYTE_ARRAY, Thrift.ConvertedType.INTERVAL)
1312
{
1413

1514
}
1615

17-
protected override SchemaElement2 CreateSimple(SchemaElement2 parent, Thrift.SchemaElement tse)
16+
protected override SchemaElement CreateSimple(SchemaElement parent, Thrift.SchemaElement tse)
1817
{
19-
return new SchemaElement2(tse.Name, DataType.Interval, parent);
18+
return new SchemaElement(tse.Name, DataType.Interval, parent);
2019
}
2120
}
2221
}

src/Parquet/DataTypes/ListDataType.cs

+10-5
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
using System;
2+
using System.Collections;
23
using System.Collections.Generic;
34
using System.Text;
45
using Parquet.Data;
5-
using Parquet.Thrift;
66

77
namespace Parquet.DataTypes
88
{
99
class ListDataType : IDataType
1010
{
1111
public int? BitWidth => null;
1212

13-
public SchemaElement2 Create(SchemaElement2 parent, IList<Thrift.SchemaElement> schema, ref int index)
13+
public SchemaElement Create(SchemaElement parent, IList<Thrift.SchemaElement> schema, ref int index)
1414
{
15-
var list = new SchemaElement2(schema[index].Name, DataType.List, parent);
16-
parent.Children.Add(list);
15+
var list = new SchemaElement(schema[index].Name, DataType.List, parent);
16+
parent.NewChildren.Add(list);
1717

1818
//skip this element and child container
1919
index += 2;
@@ -23,7 +23,12 @@ public SchemaElement2 Create(SchemaElement2 parent, IList<Thrift.SchemaElement>
2323

2424
public bool IsMatch(Thrift.SchemaElement tse, ParquetOptions formatOptions)
2525
{
26-
return tse.__isset.converted_type && tse.Converted_type == ConvertedType.LIST;
26+
return tse.__isset.converted_type && tse.Converted_type == Thrift.ConvertedType.LIST;
27+
}
28+
29+
public IList Read(byte[] data)
30+
{
31+
throw new NotImplementedException();
2732
}
2833
}
2934
}

0 commit comments

Comments
 (0)