Skip to content

Commit e35478a

Browse files
authored
Fixed schema inferring when first rows are null (#1274)
* Fixed schema infering when first rows are null * Static analysis * fixed failing tests * Regenerated DSL functions definition for documentation
1 parent 2c5407f commit e35478a

File tree

55 files changed

+516
-274
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+516
-274
lines changed

src/adapter/etl-adapter-elasticsearch/src/Flow/ETL/Adapter/Elasticsearch/EntryIdFactory/HashIdFactory.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
namespace Flow\ETL\Adapter\Elasticsearch\EntryIdFactory;
66

7+
use function Flow\ETL\DSL\string_entry;
78
use Flow\ETL\Adapter\Elasticsearch\IdFactory;
89
use Flow\ETL\Hash\{Algorithm, NativePHPHash};
910
use Flow\ETL\Row;
@@ -26,7 +27,7 @@ public function __construct(string ...$entryNames)
2627

2728
public function create(Row $row) : Entry
2829
{
29-
return new Entry\StringEntry(
30+
return string_entry(
3031
'id',
3132
$this->hashAlgorithm->hash(
3233
\implode(':', \array_map(fn (string $name) : string => (string) $row->valueOf($name), $this->entryNames))

src/adapter/etl-adapter-elasticsearch/tests/Flow/ETL/Adapter/Elasticsearch/Tests/Integration/ElasticsearchPHP/ElasticsearchExtractorTest.php

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
namespace Flow\ETL\Adapter\Elasticsearch\Tests\Integration\ElasticsearchPHP;
66

77
use function Flow\ETL\Adapter\Elasticsearch\{es_hits_to_rows, from_es, to_es_bulk_index};
8-
use function Flow\ETL\DSL\{df, generate_random_int};
8+
use function Flow\ETL\DSL\{bool_entry, df, generate_random_int, int_entry, string_entry};
99
use Flow\ETL\Adapter\Elasticsearch\ElasticsearchPHP\DocumentDataSource;
1010
use Flow\ETL\Adapter\Elasticsearch\EntryIdFactory\EntryIdFactory;
1111
use Flow\ETL\Adapter\Elasticsearch\Tests\Integration\TestCase;
@@ -37,10 +37,10 @@ public function test_empty_extraction() : void
3737
$loader->load(new Rows(
3838
...\array_map(
3939
static fn (int $i) : Row => Row::create(
40-
new Row\Entry\StringEntry('id', \sha1((string) $i)),
41-
new Row\Entry\IntegerEntry('position', $i),
42-
new Row\Entry\StringEntry('name', 'id_' . $i),
43-
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
40+
string_entry('id', \sha1((string) $i)),
41+
int_entry('position', $i),
42+
string_entry('name', 'id_' . $i),
43+
bool_entry('active', (bool) generate_random_int(0, 1))
4444
),
4545
\range(1, 100)
4646
),
@@ -77,10 +77,10 @@ public function test_extraction_index_with_from_and_size() : void
7777
$loader->load(new Rows(
7878
...\array_map(
7979
static fn (int $i) : Row => Row::create(
80-
new Row\Entry\StringEntry('id', \sha1((string) $i)),
81-
new Row\Entry\IntegerEntry('position', $i),
82-
new Row\Entry\StringEntry('name', 'id_' . $i),
83-
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
80+
string_entry('id', \sha1((string) $i)),
81+
int_entry('position', $i),
82+
string_entry('name', 'id_' . $i),
83+
bool_entry('active', (bool) generate_random_int(0, 1))
8484
),
8585
\range(1, 2000)
8686
),
@@ -120,10 +120,10 @@ public function test_extraction_index_with_search_after() : void
120120
$loader->load(new Rows(
121121
...\array_map(
122122
static fn (int $i) : Row => Row::create(
123-
new Row\Entry\StringEntry('id', \sha1((string) $i)),
124-
new Row\Entry\IntegerEntry('position', $i),
125-
new Row\Entry\StringEntry('name', 'id_' . $i),
126-
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
123+
string_entry('id', \sha1((string) $i)),
124+
int_entry('position', $i),
125+
string_entry('name', 'id_' . $i),
126+
bool_entry('active', (bool) generate_random_int(0, 1))
127127
),
128128
\range(1, 2005)
129129
),
@@ -156,10 +156,10 @@ public function test_extraction_index_with_search_after_with_point_in_time() : v
156156
$loader->load(new Rows(
157157
...\array_map(
158158
static fn (int $i) : Row => Row::create(
159-
new Row\Entry\StringEntry('id', \sha1((string) $i)),
160-
new Row\Entry\IntegerEntry('position', $i),
161-
new Row\Entry\StringEntry('name', 'id_' . $i),
162-
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
159+
string_entry('id', \sha1((string) $i)),
160+
int_entry('position', $i),
161+
string_entry('name', 'id_' . $i),
162+
bool_entry('active', (bool) generate_random_int(0, 1))
163163
),
164164
\range(1, 2005)
165165
),
@@ -197,10 +197,10 @@ public function test_extraction_whole_index_with_point_in_time() : void
197197
$loader->load(new Rows(
198198
...\array_map(
199199
static fn (int $i) : Row => Row::create(
200-
new Row\Entry\StringEntry('id', \sha1((string) $i)),
201-
new Row\Entry\IntegerEntry('position', $i),
202-
new Row\Entry\StringEntry('name', 'id_' . $i),
203-
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
200+
string_entry('id', \sha1((string) $i)),
201+
int_entry('position', $i),
202+
string_entry('name', 'id_' . $i),
203+
bool_entry('active', (bool) generate_random_int(0, 1))
204204
),
205205
\range(1, 2005)
206206
),

src/adapter/etl-adapter-elasticsearch/tests/Flow/ETL/Adapter/Elasticsearch/Tests/Integration/ElasticsearchPHP/ElasticsearchLoaderTest.php

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
namespace Flow\ETL\Adapter\Elasticsearch\Tests\Integration\ElasticsearchPHP;
66

77
use function Flow\ETL\Adapter\Elasticsearch\{entry_id_factory, hash_id_factory, to_es_bulk_index, to_es_bulk_update};
8-
use function Flow\ETL\DSL\generate_random_string;
8+
use function Flow\ETL\DSL\{generate_random_string, string_entry};
99
use Flow\ETL\Adapter\Elasticsearch\Tests\Integration\TestCase;
1010
use Flow\ETL\{Config, FlowContext, Row, Rows};
1111

@@ -53,20 +53,20 @@ public function test_integration_with_entry_factory() : void
5353

5454
$loader->load(new Rows(
5555
Row::create(
56-
new Row\Entry\StringEntry('id', \sha1('id' . generate_random_string())),
57-
new Row\Entry\StringEntry('name', 'Łukasz')
56+
string_entry('id', \sha1('id' . generate_random_string())),
57+
string_entry('name', 'Łukasz')
5858
),
5959
Row::create(
60-
new Row\Entry\StringEntry('id', \sha1('id' . generate_random_string())),
61-
new Row\Entry\StringEntry('name', 'Norbert')
60+
string_entry('id', \sha1('id' . generate_random_string())),
61+
string_entry('name', 'Norbert')
6262
),
6363
Row::create(
64-
new Row\Entry\StringEntry('id', \sha1('id' . generate_random_string())),
65-
new Row\Entry\StringEntry('name', 'Dawid')
64+
string_entry('id', \sha1('id' . generate_random_string())),
65+
string_entry('name', 'Dawid')
6666
),
6767
Row::create(
68-
new Row\Entry\StringEntry('id', \sha1('id' . generate_random_string())),
69-
new Row\Entry\StringEntry('name', 'Tomek')
68+
string_entry('id', \sha1('id' . generate_random_string())),
69+
string_entry('name', 'Tomek')
7070
),
7171
), new FlowContext(Config::default()));
7272

@@ -125,8 +125,8 @@ public function test_integration_with_partial_update_id_factory() : void
125125
$insertLoader->load(new Rows(
126126
Row::create(
127127
new Row\Entry\IntegerEntry('id', 1),
128-
new Row\Entry\StringEntry('name', 'Some Name'),
129-
new Row\Entry\StringEntry('status', 'NEW'),
128+
string_entry('name', 'Some Name'),
129+
string_entry('status', 'NEW'),
130130
new Row\Entry\DateTimeEntry('updated_at', new \DateTimeImmutable('2022-01-01 00:00:00'))
131131
),
132132
), new FlowContext(Config::default()));
@@ -136,7 +136,7 @@ public function test_integration_with_partial_update_id_factory() : void
136136
$updateLoader->load(new Rows(
137137
Row::create(
138138
new Row\Entry\IntegerEntry('id', 1),
139-
new Row\Entry\StringEntry('name', 'Other Name'),
139+
string_entry('name', 'Other Name'),
140140
),
141141
), new FlowContext(Config::default()));
142142

@@ -179,19 +179,19 @@ public function test_integration_with_sha1_id_factory() : void
179179
$loader->load(new Rows(
180180
Row::create(
181181
new Row\Entry\IntegerEntry('id', 1),
182-
new Row\Entry\StringEntry('name', 'Łukasz')
182+
string_entry('name', 'Łukasz')
183183
),
184184
Row::create(
185185
new Row\Entry\IntegerEntry('id', 2),
186-
new Row\Entry\StringEntry('name', 'Norbert')
186+
string_entry('name', 'Norbert')
187187
),
188188
Row::create(
189189
new Row\Entry\IntegerEntry('id', 3),
190-
new Row\Entry\StringEntry('name', 'Dawid')
190+
string_entry('name', 'Dawid')
191191
),
192192
Row::create(
193193
new Row\Entry\IntegerEntry('id', 4),
194-
new Row\Entry\StringEntry('name', 'Tomek')
194+
string_entry('name', 'Tomek')
195195
),
196196
), new FlowContext(Config::default()));
197197

src/adapter/etl-adapter-elasticsearch/tests/Flow/ETL/Adapter/Elasticsearch/Tests/Integration/ElasticsearchPHP/IntegrationTest.php

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
namespace Flow\ETL\Adapter\Elasticsearch\Tests\Integration\ElasticsearchPHP;
66

77
use function Flow\ETL\Adapter\Elasticsearch\{es_hits_to_rows, from_es, to_es_bulk_index};
8+
use function Flow\ETL\DSL\{bool_entry, int_entry, string_entry};
89
use Flow\ETL\Adapter\Elasticsearch\EntryIdFactory\EntryIdFactory;
910
use Flow\ETL\Adapter\Elasticsearch\Tests\Integration\TestCase;
1011
use Flow\ETL\{Flow, Row, Rows};
@@ -39,10 +40,10 @@ public function test_loading_and_extraction_with_limit_and_transformation() : vo
3940
new Rows(
4041
...\array_map(
4142
static fn (int $i) : Row => Row::create(
42-
new Row\Entry\StringEntry('id', \sha1((string) $i)),
43-
new Row\Entry\IntegerEntry('position', $i),
44-
new Row\Entry\StringEntry('name', 'id_' . $i),
45-
new Row\Entry\BooleanEntry('active', false)
43+
string_entry('id', \sha1((string) $i)),
44+
int_entry('position', $i),
45+
string_entry('name', 'id_' . $i),
46+
bool_entry('active', false)
4647
),
4748
\range(1, 2005)
4849
),

src/adapter/etl-adapter-elasticsearch/tests/Flow/ETL/Adapter/Elasticsearch/Tests/Unit/EntryIdFactory/HashIdFactoryTest.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
namespace Flow\ETL\Adapter\Elasticsearch\Tests\Unit\EntryIdFactory;
66

7-
use function Flow\ETL\DSL\str_entry;
7+
use function Flow\ETL\DSL\{str_entry, string_entry};
88
use Flow\ETL\Adapter\Elasticsearch\EntryIdFactory\HashIdFactory;
99
use Flow\ETL\Hash\NativePHPHash;
1010
use Flow\ETL\Row;
@@ -17,7 +17,7 @@ public function test_create_row() : void
1717
$factory = new HashIdFactory('first_name', 'last_name');
1818

1919
self::assertEquals(
20-
new Row\Entry\StringEntry(
20+
string_entry(
2121
'id',
2222
\hash('xxh128', 'John:Doe')
2323
),
@@ -32,7 +32,7 @@ public function test_create_row_with_different_hash() : void
3232
$factory = (new HashIdFactory('first_name', 'last_name'))->withAlgorithm(new NativePHPHash('sha1'));
3333

3434
self::assertEquals(
35-
new Row\Entry\StringEntry(
35+
string_entry(
3636
'id',
3737
\sha1('John:Doe')
3838
),

src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Unit/GoogleSheetExtractorTest.php

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
namespace Flow\ETL\Adapter\GoogleSheet\Tests\Unit;
66

77
use function Flow\ETL\Adapter\GoogleSheet\from_google_sheet_columns;
8-
use function Flow\ETL\DSL\str_entry;
8+
use function Flow\ETL\DSL\{str_entry, string_entry};
99
use Flow\ETL\Exception\InvalidArgumentException;
10-
use Flow\ETL\Row\Entry\StringEntry;
1110
use Flow\ETL\{Config\ConfigBuilder, FlowContext, Row, Rows};
1211
use Google\Service\Sheets;
1312
use Google\Service\Sheets\Resource\SpreadsheetsValues;
@@ -26,8 +25,8 @@ public function test_its_stop_fetching_data_if_processed_row_count_is_less_then_
2625
true,
2726
2,
2827
);
29-
$spreadSheetIdEntry = new StringEntry('_spread_sheet_id', $spreadSheetId);
30-
$sheetNameEntry = new StringEntry('_sheet_name', $sheetName);
28+
$spreadSheetIdEntry = string_entry('_spread_sheet_id', $spreadSheetId);
29+
$sheetNameEntry = string_entry('_sheet_name', $sheetName);
3130
$firstValueRangeMock = $this->createMock(Sheets\ValueRange::class);
3231
$firstValueRangeMock->method('getValues')->willReturn([
3332
['header'],

src/adapter/etl-adapter-http/src/Flow/ETL/Adapter/Http/RequestEntriesFactory.php

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
namespace Flow\ETL\Adapter\Http;
66

7+
use function Flow\ETL\DSL\string_entry;
78
use Flow\ETL\Exception\InvalidArgumentException;
89
use Flow\ETL\Row;
910
use Psr\Http\Message\RequestInterface;
@@ -38,7 +39,7 @@ public function create(RequestInterface $request) : Row\Entries
3839
}
3940
}
4041

41-
$requestBodyEntry = new Row\Entry\StringEntry('request_body', null);
42+
$requestBodyEntry = string_entry('request_body', null);
4243
$requestBody = $request->getBody();
4344

4445
if ($requestBody->isReadable()) {
@@ -58,13 +59,13 @@ public function create(RequestInterface $request) : Row\Entries
5859
if (\class_exists(Row\Entry\JsonEntry::class)) {
5960
$requestBodyEntry = new Row\Entry\JsonEntry('request_body', (array) \json_decode($requestBodyContent, true, 512, JSON_THROW_ON_ERROR));
6061
} else {
61-
$requestBodyEntry = new Row\Entry\StringEntry('request_body', $requestBodyContent);
62+
$requestBodyEntry = string_entry('request_body', $requestBodyContent);
6263
}
6364

6465
break;
6566

6667
default:
67-
$requestBodyEntry = new Row\Entry\StringEntry('request_body', $requestBodyContent);
68+
$requestBodyEntry = string_entry('request_body', $requestBodyContent);
6869

6970
break;
7071
}
@@ -73,10 +74,10 @@ public function create(RequestInterface $request) : Row\Entries
7374

7475
return new Row\Entries(
7576
$requestBodyEntry,
76-
new Row\Entry\StringEntry('request_uri', (string) $request->getUri()),
77+
string_entry('request_uri', (string) $request->getUri()),
7778
new Row\Entry\JsonEntry('request_headers', $request->getHeaders()),
78-
new Row\Entry\StringEntry('request_protocol_version', $request->getProtocolVersion()),
79-
new Row\Entry\StringEntry('request_method', $request->getMethod()),
79+
string_entry('request_protocol_version', $request->getProtocolVersion()),
80+
string_entry('request_method', $request->getMethod()),
8081
);
8182
}
8283
}

src/adapter/etl-adapter-http/src/Flow/ETL/Adapter/Http/ResponseEntriesFactory.php

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
namespace Flow\ETL\Adapter\Http;
66

7+
use function Flow\ETL\DSL\string_entry;
78
use Flow\ETL\Exception\InvalidArgumentException;
89
use Flow\ETL\Row;
910
use Psr\Http\Message\ResponseInterface;
@@ -48,26 +49,26 @@ public function create(ResponseInterface $response) : Row\Entries
4849
if (\class_exists(Row\Entry\JsonEntry::class)) {
4950
$responseBodyEntry = new Row\Entry\JsonEntry('response_body', (array) \json_decode($responseBodyContent, true, 512, JSON_THROW_ON_ERROR));
5051
} else {
51-
$responseBodyEntry = new Row\Entry\StringEntry('response_body', $responseBodyContent);
52+
$responseBodyEntry = string_entry('response_body', $responseBodyContent);
5253
}
5354

5455
break;
5556

5657
default:
57-
$responseBodyEntry = new Row\Entry\StringEntry('response_body', $responseBodyContent);
58+
$responseBodyEntry = string_entry('response_body', $responseBodyContent);
5859

5960
break;
6061
}
6162
} else {
62-
$responseBodyEntry = new Row\Entry\StringEntry('response_body', null);
63+
$responseBodyEntry = string_entry('response_body', null);
6364
}
6465

6566
return new Row\Entries(
6667
$responseBodyEntry,
6768
new Row\Entry\JsonEntry('response_headers', $response->getHeaders()),
6869
new Row\Entry\IntegerEntry('response_status_code', $response->getStatusCode()),
69-
new Row\Entry\StringEntry('response_protocol_version', $response->getProtocolVersion()),
70-
new Row\Entry\StringEntry('response_reason_phrase', $response->getReasonPhrase()),
70+
string_entry('response_protocol_version', $response->getProtocolVersion()),
71+
string_entry('response_reason_phrase', $response->getReasonPhrase()),
7172
);
7273
}
7374
}

src/adapter/etl-adapter-logger/tests/Flow/ETL/Adapter/Logger/Tests/Unit/PsrLoggerLoaderTest.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
namespace Flow\ETL\Adapter\Logger\Tests\Unit;
66

7+
use function Flow\ETL\DSL\{int_entry, string_entry};
78
use Flow\ETL\Adapter\Logger\PsrLoggerLoader;
89
use Flow\ETL\{Config, FlowContext, Row, Rows};
910
use PHPUnit\Framework\TestCase;
@@ -20,8 +21,8 @@ public function test_psr_logger_loader() : void
2021

2122
$loader->load(new Rows(
2223
Row::create(
23-
new Row\Entry\IntegerEntry('id', 12345),
24-
Row\Entry\StringEntry::lowercase('name', 'Norbert')
24+
int_entry('id', 12345),
25+
string_entry('name', 'Norbert')->toLowercase()
2526
)
2627
), new FlowContext(Config::default()));
2728

src/adapter/etl-adapter-meilisearch/tests/Flow/ETL/Adapter/Meilisearch/Tests/Integration/MeilisearchPHP/MailiSearchTest.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
namespace Flow\ETL\Adapter\Meilisearch\Tests\Integration\MeilisearchPHP;
66

77
use function Flow\ETL\Adapter\Meilisearch\{from_meilisearch, meilisearch_hits_to_rows, to_meilisearch_bulk_index};
8-
use function Flow\ETL\DSL\from_array;
8+
use function Flow\ETL\DSL\{from_array, string_entry};
99
use Flow\ETL\Adapter\Meilisearch\Tests\Context\MeilisearchContext;
1010
use Flow\ETL\Adapter\Meilisearch\Tests\Double\Spy\HttpClientSpy;
1111
use Flow\ETL\{Flow, Row, Rows};
@@ -70,9 +70,9 @@ public function test_loading_and_extraction_with_limit_and_transformation() : vo
7070
new Rows(
7171
...\array_map(
7272
static fn (int $i) : Row => Row::create(
73-
new Row\Entry\StringEntry('id', \sha1((string) $i)),
73+
string_entry('id', \sha1((string) $i)),
7474
new Row\Entry\IntegerEntry('position', $i),
75-
new Row\Entry\StringEntry('name', 'id_' . $i),
75+
string_entry('name', 'id_' . $i),
7676
new Row\Entry\BooleanEntry('active', false)
7777
),
7878
\range(1, 500)

0 commit comments

Comments
 (0)