Skip to content

Commit 50f4f33

Browse files
committed
working on review feedback
1 parent f751b0d commit 50f4f33

File tree

2 files changed

+52
-1
lines changed

2 files changed

+52
-1
lines changed

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,23 @@ private fun CsvSpecs.Builder.skipLines(takeHeaderFromCsv: Boolean, skipLines: Lo
249249
/**
250250
* Sets the correct parsers for the csv, based on [colTypes] and [ParserOptions.skipTypes].
251251
* If [ColType.DEFAULT] is present, it sets the default parser.
252+
*
253+
* Logic overview:
254+
*
255+
* - if no [colTypes] are given
256+
* - let deephaven use all its [default parsers][Parsers.DEFAULT]
257+
* - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied
258+
* - if [colTypes] are supplied
259+
* - if [ColType.DEFAULT] is among the values
260+
* - set the parser for each supplied column+colType
261+
* - let deephaven use _only_ the parser given as [ColType.DEFAULT] type
262+
* - if [ColType.DEFAULT] is not among the values
263+
* - set the parser for each supplied column+coltype
264+
* - let deephaven use all its [default parsers][Parsers.DEFAULT]
265+
* - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied
266+
*
267+
* Note that `skipTypes` will never skip a type explicitly set by `colTypes`.
268+
* This is intended.
252269
*/
253270
private fun CsvSpecs.Builder.parsers(
254271
parserOptions: ParserOptions,

dataframe-csv/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,39 @@ class DelimCsvTsvTests {
433433
shouldThrow<IllegalArgumentException> { DataFrame.readCsv("NON EXISTENT FILE") }
434434
}
435435

436+
@Test
437+
fun `cannot auto-parse specific date string`() {
438+
@Language("csv")
439+
val frenchCsv =
440+
"""
441+
name; price; date;
442+
a;12,45; 05/06/2021;
443+
b;-13,35;14/07/2025;
444+
c;100 123,35;;
445+
d;-204 235,23;;
446+
e;NaN;;
447+
f;null;;
448+
""".trimIndent()
449+
450+
val dfDeephaven = DataFrame.readCsvStr(
451+
text = frenchCsv,
452+
delimiter = ';',
453+
)
454+
455+
// could not parse, remains String
456+
dfDeephaven["date"].type() shouldBe typeOf<String?>()
457+
458+
val dfDataFrame = DataFrame.readCsvStr(
459+
text = frenchCsv,
460+
delimiter = ';',
461+
// setting any locale skips deephaven's date parsing
462+
parserOptions = DEFAULT_PARSER_OPTIONS.copy(locale = Locale.ROOT),
463+
)
464+
465+
// could not parse, remains String
466+
dfDataFrame["date"].type() shouldBe typeOf<String?>()
467+
}
468+
436469
@Test
437470
fun `parse with other locales`() {
438471
@Language("csv")
@@ -626,12 +659,13 @@ class DelimCsvTsvTests {
626659
fun `skipping types`() {
627660
val irisDataset = DataFrame.readCsv(
628661
irisDataset,
662+
colTypes = mapOf("sepal.length" to ColType.Double),
629663
parserOptions = DEFAULT_PARSER_OPTIONS.copy(
630664
skipTypes = setOf(typeOf<Double>()),
631665
),
632666
)
633667

634-
irisDataset["sepal.length"].type() shouldBe typeOf<BigDecimal>()
668+
irisDataset["sepal.length"].type() shouldBe typeOf<Double>()
635669
irisDataset["sepal.width"].type() shouldBe typeOf<BigDecimal>()
636670
irisDataset["petal.length"].type() shouldBe typeOf<BigDecimal>()
637671
irisDataset["petal.width"].type() shouldBe typeOf<BigDecimal>()

0 commit comments

Comments
 (0)