Skip to content

Commit 3580718

Browse files
authored
Added possibility to set cache batch size (#1034)
* Added possibility to set cache batch size * Static analyze fixes * Fixed failing tests
1 parent 3343e03 commit 3580718

File tree

23 files changed

+60
-52
lines changed

23 files changed

+60
-52
lines changed

phpstan.neon

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
parameters:
22
level: 8
3+
treatPhpDocTypesAsCertain: false
34
checkGenericClassInNonGenericObjectType: false
45
checkMissingIterableValueType: false
56
paths:

psalm.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
</ignoreFiles>
3939
</projectFiles>
4040
<issueHandlers>
41+
<DocblockTypeContradiction errorLevel="suppress" />
4142
<RiskyTruthyFalsyComparison errorLevel="suppress" />
4243
<LessSpecificReturnStatement errorLevel="suppress" />
4344
<MoreSpecificReturnType errorLevel="suppress" />

src/adapter/etl-adapter-csv/src/Flow/ETL/Adapter/CSV/CSVDetector.php

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ final class CSVDetector
2626
*/
2727
public function __construct($resource, ?Option $fallback = new Option(',', '"', '\\'), ?Options $options = null)
2828
{
29-
/** @psalm-suppress DocblockTypeContradiction */
3029
if (!\is_resource($resource)) {
3130
throw new InvalidArgumentException('Argument must be a valid resource');
3231
}

src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/GoogleSheetExtractor.php

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,7 @@ public function extract(FlowContext $context) : \Generator
4848
/**
4949
* @var array[] $values
5050
*
51-
* @psalm-suppress RedundantConditionGivenDocblockType, DocblockTypeContradiction
52-
*
53-
* @phpstan-ignore-next-line
51+
* @psalm-suppress RedundantConditionGivenDocblockType
5452
*/
5553
$values = $response->getValues() ?? [];
5654

@@ -116,9 +114,7 @@ function (array $rowData) use ($headers, $shouldPutInputIntoRows) {
116114
/**
117115
* @var array[] $values
118116
*
119-
* @psalm-suppress RedundantConditionGivenDocblockType, DocblockTypeContradiction
120-
*
121-
* @phpstan-ignore-next-line
117+
* @psalm-suppress RedundantConditionGivenDocblockType
122118
*/
123119
$values = $response->getValues() ?? [];
124120
}

src/core/etl/src/Flow/ETL/Config.php

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
namespace Flow\ETL;
66

7+
use Flow\ETL\Exception\InvalidArgumentException;
78
use Flow\ETL\Filesystem\FilesystemStreams;
89
use Flow\ETL\Pipeline\Optimizer;
910
use Flow\ETL\Row\EntryFactory;
@@ -19,6 +20,9 @@ final class Config
1920

2021
public const EXTERNAL_SORT_MAX_MEMORY_ENV = 'FLOW_EXTERNAL_SORT_MAX_MEMORY';
2122

23+
/**
24+
* @param int<1, max> $cacheBatchSize
25+
*/
2226
public function __construct(
2327
private readonly string $id,
2428
private readonly Serializer $serializer,
@@ -27,8 +31,12 @@ public function __construct(
2731
private readonly FilesystemStreams $filesystemStreams,
2832
private readonly Optimizer $optimizer,
2933
private readonly bool $putInputIntoRows,
30-
private readonly EntryFactory $entryFactory
34+
private readonly EntryFactory $entryFactory,
35+
private readonly int $cacheBatchSize
3136
) {
37+
if ($this->cacheBatchSize < 1) {
38+
throw new InvalidArgumentException('Cache batch size must be greater than 0');
39+
}
3240
}
3341

3442
public static function builder() : ConfigBuilder
@@ -46,6 +54,14 @@ public function cache() : Cache
4654
return $this->cache;
4755
}
4856

57+
/**
58+
* @return int<1, max>
59+
*/
60+
public function cacheBatchSize() : int
61+
{
62+
return $this->cacheBatchSize;
63+
}
64+
4965
public function entryFactory() : EntryFactory
5066
{
5167
return $this->entryFactory;

src/core/etl/src/Flow/ETL/ConfigBuilder.php

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,23 @@
55
namespace Flow\ETL;
66

77
use Flow\ETL\Cache\LocalFilesystemCache;
8+
use Flow\ETL\Exception\InvalidArgumentException;
89
use Flow\ETL\ExternalSort\MemorySort;
910
use Flow\ETL\Filesystem\{FilesystemStreams, LocalFilesystem};
1011
use Flow\ETL\Monitoring\Memory\Unit;
1112
use Flow\ETL\Pipeline\Optimizer;
1213
use Flow\ETL\Row\Factory\NativeEntryFactory;
13-
use Flow\Serializer\{CompressingSerializer, NativePHPSerializer, Serializer};
14+
use Flow\Serializer\{NativePHPSerializer, Serializer};
1415

1516
final class ConfigBuilder
1617
{
1718
private ?Cache $cache;
1819

20+
/**
21+
* @var int<1, max>
22+
*/
23+
private int $cacheBatchSize = 1000;
24+
1925
private ?ExternalSort $externalSort;
2026

2127
private ?Filesystem $filesystem;
@@ -46,7 +52,7 @@ public function build() : Config
4652
{
4753
$this->id ??= \uniqid('flow_php', true);
4854
$entryFactory = new NativeEntryFactory();
49-
$this->serializer ??= new CompressingSerializer(new NativePHPSerializer());
55+
$this->serializer ??= new NativePHPSerializer();
5056
$cachePath = \is_string(\getenv(Config::CACHE_DIR_ENV)) && \realpath(\getenv(Config::CACHE_DIR_ENV))
5157
? \getenv(Config::CACHE_DIR_ENV)
5258
: \sys_get_temp_dir() . '/flow_php/cache';
@@ -91,7 +97,8 @@ public function build() : Config
9197
new FilesystemStreams($this->filesystem),
9298
$this->optimizer,
9399
$this->putInputIntoRows,
94-
$entryFactory
100+
$entryFactory,
101+
$this->cacheBatchSize
95102
);
96103
}
97104

@@ -102,6 +109,20 @@ public function cache(Cache $cache) : self
102109
return $this;
103110
}
104111

112+
/**
113+
* @throws InvalidArgumentException
114+
*/
115+
public function cacheBatchSize(int $cacheBatchSize) : self
116+
{
117+
if ($cacheBatchSize < 1) {
118+
throw new InvalidArgumentException('Cache batch size must be greater than 0');
119+
}
120+
121+
$this->cacheBatchSize = $cacheBatchSize;
122+
123+
return $this;
124+
}
125+
105126
public function dontPutInputIntoRows() : self
106127
{
107128
$this->putInputIntoRows = false;

src/core/etl/src/Flow/ETL/DataFrame.php

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,16 @@ public function batchSize(int $size) : self
144144
* @lazy
145145
*
146146
* @param null|string $id
147+
*
148+
* @throws InvalidArgumentException
147149
*/
148-
public function cache(?string $id = null) : self
150+
public function cache(?string $id = null, ?int $cacheBatchSize = null) : self
149151
{
152+
if ($cacheBatchSize !== null && $cacheBatchSize < 1) {
153+
throw new InvalidArgumentException('Cache batch size must be greater than 0');
154+
}
155+
156+
$this->batchSize($cacheBatchSize ?? $this->context->config->cacheBatchSize());
150157
$this->pipeline = new CachingPipeline($this->pipeline, $id);
151158

152159
return $this;

src/core/etl/src/Flow/ETL/Join/Expression.php

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ public function __construct(
1818
}
1919

2020
/**
21-
* @psalm-suppress DocblockTypeContradiction
22-
*
2321
* @param array<string, string>|Comparison $comparison
2422
*/
2523
public static function on(array|Comparison $comparison, string $joinPrefix = '') : self

src/core/etl/src/Flow/ETL/Monitoring/Memory/Unit.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,13 @@ public static function fromString(string $memoryString) : self
4343

4444
switch (\strtoupper($unit)) {
4545
case 'K':
46+
case 'B':
4647
return self::fromKb((int) \substr($limit, 0, -1));
4748
case 'M':
49+
case 'MB':
4850
return self::fromMb((int) \substr($limit, 0, -1));
4951
case 'G':
52+
case 'GB':
5053
return self::fromGb((int) \substr($limit, 0, -1));
5154

5255
default:

src/core/etl/src/Flow/ETL/Pipeline/BatchingPipeline.php

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,6 @@ public function __construct(private readonly Pipeline $pipeline, private readonl
2222
{
2323
$this->nextPipeline = $pipeline->cleanCopy();
2424

25-
/**
26-
* @psalm-suppress DocblockTypeContradiction
27-
*
28-
* @phpstan-ignore-next-line
29-
*/
3025
if ($this->size <= 0) {
3126
throw new InvalidArgumentException('Batch size must be greater than 0, given: ' . $this->size);
3227
}

0 commit comments

Comments
 (0)