Skip to content

Commit

Permalink
Fixed writing nullable parquet values that are marked in the schema a…
Browse files Browse the repository at this point in the history
…s optional (#1025)

* Fixed writing nullable parquet values that are marked in the schema as optional

* Fixed failing validation test

* Fixed reading empty chunks without snappy extension
  • Loading branch information
norberttech authored Mar 28, 2024
1 parent 4e3e08f commit 37a630b
Show file tree
Hide file tree
Showing 14 changed files with 222 additions and 135 deletions.
82 changes: 44 additions & 38 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ public function decodeBitPacked(BinaryReader $reader, int $bitWidth, int $varInt
$readBytes = $reader->readBytes(\min($remainingByteCount, $totalByteCount));
$actualByteCount = $readBytes->count();

if ($actualByteCount === 0) {
return;
}

$bitMask = (1 << $bitWidth) - 1;
$byteIndex = 0;
$currentByte = $readBytes[$byteIndex];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public function readDictionary(FlatColumn $column, PageHeader $pageHeader, Compr
(new Codec($this->options))
->decompress(
/** @phpstan-ignore-next-line */
\fread($stream, $pageHeader->compressedPageSize()),
$pageHeader->compressedPageSize() === 0 ? '' : \fread($stream, $pageHeader->compressedPageSize()),
$codec
),
$column,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
namespace Flow\Parquet\ParquetFile;

use Flow\Parquet\Data\DataConverter;
use Flow\Parquet\Exception\InvalidArgumentException;
use Flow\Parquet\ParquetFile\RowGroupBuilder\Validator\{ColumnDataValidator, DisabledValidator};
use Flow\Parquet\ParquetFile\RowGroupBuilder\{ColumnChunkBuilder, Flattener, PageSizeCalculator, RowGroupContainer, RowGroupStatistics};
use Flow\Parquet\{Option, Options};
Expand Down Expand Up @@ -49,9 +48,6 @@ public function addRow(array $row) : void
$flatRow = [];

foreach ($this->schema->columns() as $column) {
if (!\array_key_exists($column->name(), $row)) {
throw new InvalidArgumentException(\sprintf("Column '%s' not found in row", $column->name()));
}
$flatRow[] = $this->flattener->flattenColumn($column, $row);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public function flattenColumn(Column $column, array $row) : array
if (!\array_key_exists($column->name(), $row)) {
$this->validator->validate($column, null);

return [];
return [$column->name() => null];
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ public function isMap() : bool;

public function isMapElement() : bool;

public function isRequired() : bool;

public function isStruct() : bool;

public function isStructElement() : bool;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,11 @@ public function isMapElement() : bool
return false;
}

public function isRequired() : bool
{
return $this->repetition !== Repetition::OPTIONAL;
}

public function isStruct() : bool
{
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,11 @@ public function isMapElement() : bool
return false;
}

public function isRequired() : bool
{
return $this->repetition !== Repetition::OPTIONAL;
}

public function isStruct() : bool
{
if ($this->isMap()) {
Expand Down
Loading

0 comments on commit 37a630b

Please sign in to comment.