Skip to content

Commit 8c2d501

Browse files
committed
added test for NA doubles and custom nullStrings in csv double parser
1 parent 8ff35d9 commit 8c2d501

File tree

6 files changed

+123
-23
lines changed

6 files changed

+123
-23
lines changed

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import kotlinx.datetime.toKotlinLocalDate
99
import kotlinx.datetime.toKotlinLocalDateTime
1010
import org.jetbrains.kotlinx.dataframe.DataColumn
1111
import org.jetbrains.kotlinx.dataframe.DataFrame
12+
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
1213
import org.jetbrains.kotlinx.dataframe.api.cast
1314
import org.jetbrains.kotlinx.dataframe.api.columnOf
1415
import org.jetbrains.kotlinx.dataframe.api.convertTo
@@ -128,6 +129,17 @@ class ParserTests {
128129
)
129130
}
130131

132+
@Test
133+
fun `custom nullStrings`() {
134+
val col by columnOf("1", "2", "null", "3", "NA", "nothing", "4.0", "5.0")
135+
136+
val parsed = col.tryParse(
137+
ParserOptions(nullStrings = setOf("null", "NA", "nothing")),
138+
)
139+
parsed.type() shouldBe typeOf<Double?>()
140+
parsed.toList() shouldBe listOf(1, 2, null, 3, null, null, 4.0, 5.0)
141+
}
142+
131143
@Test
132144
fun `converting String to Double in different locales`() {
133145
val currentLocale = Locale.getDefault()

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/DelimParams.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import org.jetbrains.kotlinx.dataframe.io.ColType
77
import org.jetbrains.kotlinx.dataframe.io.Compression
88
import org.jetbrains.kotlinx.dataframe.io.DEFAULT_COL_TYPE
99
import org.jetbrains.kotlinx.dataframe.io.QuoteMode
10-
import org.jetbrains.kotlinx.dataframe.io.defaultDelimParserOptions
10+
import org.jetbrains.kotlinx.dataframe.io.DEFAULT_PARSER_OPTIONS
1111

1212
/**
1313
* Contains both the default values of csv/tsv parameters and the parameter KDocs.
@@ -78,12 +78,12 @@ internal object DelimParams {
7878
/**
7979
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
8080
* Can configure locale, date format, double parsing, skipping types, etc.
81-
* Default, [defaultDelimParserOptions]:
81+
* Default, [DEFAULT_PARSER_OPTIONS]:
8282
* ```
8383
* ParserOptions(nullStrings = ["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"])
8484
* ```
8585
*/
86-
val PARSER_OPTIONS: ParserOptions = defaultDelimParserOptions
86+
val PARSER_OPTIONS: ParserOptions = DEFAULT_PARSER_OPTIONS
8787

8888
/**
8989
* @param ignoreEmptyLines If `true`, intermediate empty lines will be skipped.

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
3636
import org.jetbrains.kotlinx.dataframe.io.ColType
3737
import org.jetbrains.kotlinx.dataframe.io.Compression
3838
import org.jetbrains.kotlinx.dataframe.io.DEFAULT_COL_TYPE
39-
import org.jetbrains.kotlinx.dataframe.io.defaultNullStrings
39+
import org.jetbrains.kotlinx.dataframe.io.DEFAULT_NULL_STRINGS
4040
import java.io.InputStream
4141
import java.math.BigDecimal
4242
import java.net.URL
@@ -86,7 +86,7 @@ internal fun readDelimImpl(
8686
val csvSpecs = with(CsvSpecs.builder()) {
8787
if (additionalCsvSpecs != null) from(additionalCsvSpecs)
8888
customDoubleParser(FastDoubleParser(parserOptions))
89-
nullValueLiterals(parserOptions.nullStrings ?: defaultNullStrings)
89+
nullValueLiterals(parserOptions.nullStrings ?: DEFAULT_NULL_STRINGS)
9090
headerLegalizer(::legalizeHeader)
9191
numRows(readLines ?: Long.MAX_VALUE)
9292
ignoreEmptyLines(ignoreEmptyLines)

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/util.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ public const val DEFAULT_COL_TYPE: String = ".default"
1111
/**
1212
* Default strings that are considered null.
1313
*/
14-
public val defaultNullStrings: Set<String> =
14+
public val DEFAULT_NULL_STRINGS: Set<String> =
1515
setOf("", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil")
1616

17-
public val defaultDelimParserOptions: ParserOptions =
18-
ParserOptions(nullStrings = defaultNullStrings)
17+
public val DEFAULT_PARSER_OPTIONS: ParserOptions =
18+
ParserOptions(nullStrings = DEFAULT_NULL_STRINGS)

dataframe-csv/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -529,24 +529,27 @@ class DelimCsvTsvTests {
529529
}
530530

531531
@Test
532-
fun `parse with wrong locales`() {
533-
@Language("csv")
534-
val csv =
535-
"""
536-
name; price;
537-
a;12,45;
538-
b;-13,35;
539-
c;100.123,35;
540-
d;-204.235,23;
541-
""".trimIndent()
542-
543-
val df = DataFrame.readCsvStr(
544-
text = csv,
545-
delimiter = ';',
532+
fun `NA and custom null string in double column`() {
533+
val df = DataFrame.readCsv(
534+
msleepCsv,
546535
parserOptions = ParserOptions(
547-
locale = Locale.GERMAN,
536+
nullStrings = DEFAULT_NULL_STRINGS + "nothing",
548537
),
549538
)
539+
540+
df.print(columnTypes = true, borders = true, title = true)
541+
542+
df["name"].type() shouldBe typeOf<String>()
543+
df["genus"].type() shouldBe typeOf<String>()
544+
df["vore"].type() shouldBe typeOf<String?>()
545+
df["order"].type() shouldBe typeOf<String>()
546+
df["conservation"].type() shouldBe typeOf<String?>()
547+
df["sleep_total"].type() shouldBe typeOf<Double>()
548+
df["sleep_rem"].type() shouldBe typeOf<Double?>()
549+
df["sleep_cycle"].type() shouldBe typeOf<Double?>()
550+
df["awake"].type() shouldBe typeOf<Double>()
551+
df["brainwt"].type() shouldBe typeOf<Double?>()
552+
df["bodywt"].type() shouldBe typeOf<Double?>()
550553
}
551554

552555
companion object {
@@ -558,6 +561,7 @@ class DelimCsvTsvTests {
558561
private val wineCsv = testCsv("wine")
559562
private val durationCsv = testCsv("duration")
560563
private val withBomCsv = testCsv("with-bom")
564+
private val msleepCsv = testCsv("msleep")
561565
}
562566
}
563567

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
name,genus,vore,order,conservation,sleep_total,sleep_rem,sleep_cycle,awake,brainwt,bodywt
2+
Cheetah,Acinonyx,carni,Carnivora,lc,12.1,nothing,NA,11.9,NA,50
3+
Owl monkey,Aotus,omni,Primates,NA,17,1.8,NA,7,0.0155,0.48
4+
Mountain beaver,Aplodontia,herbi,Rodentia,nt,14.4,2.4,NA,9.6,NA,1.35
5+
Greater short-tailed shrew,Blarina,omni,Soricomorpha,lc,14.9,2.3,0.133333333,9.1,0.00029,0.019
6+
Cow,Bos,herbi,Artiodactyla,domesticated,4,0.7,0.666666667,20,0.423,600
7+
Three-toed sloth,Bradypus,herbi,Pilosa,NA,14.4,2.2,0.766666667,9.6,NA,3.85
8+
Northern fur seal,Callorhinus,carni,Carnivora,vu,8.7,1.4,0.383333333,15.3,NA,20.49
9+
Vesper mouse,Calomys,NA,Rodentia,NA,7,NA,NA,17,NA,0.045
10+
Dog,Canis,carni,Carnivora,domesticated,10.1,2.9,0.333333333,13.9,0.07,14
11+
Roe deer,Capreolus,herbi,Artiodactyla,lc,3,NA,NA,21,0.0982,14.8
12+
Goat,Capri,herbi,Artiodactyla,lc,5.3,0.6,NA,18.7,0.115,33.5
13+
Guinea pig,Cavis,herbi,Rodentia,domesticated,9.4,0.8,0.216666667,14.6,0.0055,0.728
14+
Grivet,Cercopithecus,omni,Primates,lc,10,0.7,NA,14,NA,4.75
15+
Chinchilla,Chinchilla,herbi,Rodentia,domesticated,12.5,1.5,0.116666667,11.5,0.0064,0.42
16+
Star-nosed mole,Condylura,omni,Soricomorpha,lc,10.3,2.2,NA,13.7,0.001,0.06
17+
African giant pouched rat,Cricetomys,omni,Rodentia,NA,8.3,2,NA,15.7,0.0066,1
18+
Lesser short-tailed shrew,Cryptotis,omni,Soricomorpha,lc,9.1,1.4,0.15,14.9,0.00014,0.005
19+
Long-nosed armadillo,Dasypus,carni,Cingulata,lc,17.4,3.1,0.383333333,6.6,0.0108,3.5
20+
Tree hyrax,Dendrohyrax,herbi,Hyracoidea,lc,5.3,0.5,NA,18.7,0.0123,2.95
21+
North American Opossum,Didelphis,omni,Didelphimorphia,lc,18,4.9,0.333333333,6,0.0063,1.7
22+
Asian elephant,Elephas,herbi,Proboscidea,en,3.9,NA,NA,20.1,4.603,2547
23+
Big brown bat,Eptesicus,insecti,Chiroptera,lc,19.7,3.9,0.116666667,4.3,3e-04,0.023
24+
Horse,Equus,herbi,Perissodactyla,domesticated,2.9,0.6,1,21.1,0.655,521
25+
Donkey,Equus,herbi,Perissodactyla,domesticated,3.1,0.4,NA,20.9,0.419,187
26+
European hedgehog,Erinaceus,omni,Erinaceomorpha,lc,10.1,3.5,0.283333333,13.9,0.0035,0.77
27+
Patas monkey,Erythrocebus,omni,Primates,lc,10.9,1.1,NA,13.1,0.115,10
28+
Western american chipmunk,Eutamias,herbi,Rodentia,NA,14.9,NA,NA,9.1,NA,0.071
29+
Domestic cat,Felis,carni,Carnivora,domesticated,12.5,3.2,0.416666667,11.5,0.0256,3.3
30+
Galago,Galago,omni,Primates,NA,9.8,1.1,0.55,14.2,0.005,0.2
31+
Giraffe,Giraffa,herbi,Artiodactyla,cd,1.9,0.4,NA,22.1,NA,899.995
32+
Pilot whale,Globicephalus,carni,Cetacea,cd,2.7,0.1,NA,21.35,NA,800
33+
Gray seal,Haliochoerus,carni,Carnivora,lc,6.2,1.5,NA,17.8,0.325,85
34+
Gray hyrax,Heterohyrax,herbi,Hyracoidea,lc,6.3,0.6,NA,17.7,0.01227,2.625
35+
Human,Homo,omni,Primates,NA,8,1.9,1.5,16,1.32,62
36+
Mongoose lemur,Lemur,herbi,Primates,vu,9.5,0.9,NA,14.5,NA,1.67
37+
African elephant,Loxodonta,herbi,Proboscidea,vu,3.3,NA,NA,20.7,5.712,6654
38+
Thick-tailed opposum,Lutreolina,carni,Didelphimorphia,lc,19.4,6.6,NA,4.6,NA,0.37
39+
Macaque,Macaca,omni,Primates,NA,10.1,1.2,0.75,13.9,0.179,6.8
40+
Mongolian gerbil,Meriones,herbi,Rodentia,lc,14.2,1.9,NA,9.8,NA,0.053
41+
Golden hamster,Mesocricetus,herbi,Rodentia,en,14.3,3.1,0.2,9.7,0.001,0.12
42+
Vole ,Microtus,herbi,Rodentia,NA,12.8,NA,NA,11.2,NA,0.035
43+
House mouse,Mus,herbi,Rodentia,nt,12.5,1.4,0.183333333,11.5,4e-04,0.022
44+
Little brown bat,Myotis,insecti,Chiroptera,NA,19.9,2,0.2,4.1,0.00025,0.01
45+
Round-tailed muskrat,Neofiber,herbi,Rodentia,nt,14.6,NA,NA,9.4,NA,0.266
46+
Slow loris,Nyctibeus,carni,Primates,NA,11,NA,NA,13,0.0125,1.4
47+
Degu,Octodon,herbi,Rodentia,lc,7.7,0.9,NA,16.3,NA,0.21
48+
Northern grasshopper mouse,Onychomys,carni,Rodentia,lc,14.5,NA,NA,9.5,NA,0.028
49+
Rabbit,Oryctolagus,herbi,Lagomorpha,domesticated,8.4,0.9,0.416666667,15.6,0.0121,2.5
50+
Sheep,Ovis,herbi,Artiodactyla,domesticated,3.8,0.6,NA,20.2,0.175,55.5
51+
Chimpanzee,Pan,omni,Primates,NA,9.7,1.4,1.416666667,14.3,0.44,52.2
52+
Tiger,Panthera,carni,Carnivora,en,15.8,NA,NA,8.2,NA,162.564
53+
Jaguar,Panthera,carni,Carnivora,nt,10.4,NA,NA,13.6,0.157,100
54+
Lion,Panthera,carni,Carnivora,vu,13.5,NA,NA,10.5,NA,161.499
55+
Baboon,Papio,omni,Primates,NA,9.4,1,0.666666667,14.6,0.18,25.235
56+
Desert hedgehog,Paraechinus,NA,Erinaceomorpha,lc,10.3,2.7,NA,13.7,0.0024,0.55
57+
Potto,Perodicticus,omni,Primates,lc,11,NA,NA,13,NA,1.1
58+
Deer mouse,Peromyscus,NA,Rodentia,NA,11.5,NA,NA,12.5,NA,0.021
59+
Phalanger,Phalanger,NA,Diprotodontia,NA,13.7,1.8,NA,10.3,0.0114,1.62
60+
Caspian seal,Phoca,carni,Carnivora,vu,3.5,0.4,NA,20.5,NA,86
61+
Common porpoise,Phocoena,carni,Cetacea,vu,5.6,NA,NA,18.45,NA,53.18
62+
Potoroo,Potorous,herbi,Diprotodontia,NA,11.1,1.5,NA,12.9,NA,1.1
63+
Giant armadillo,Priodontes,insecti,Cingulata,en,18.1,6.1,NA,5.9,0.081,60
64+
Rock hyrax,Procavia,NA,Hyracoidea,lc,5.4,0.5,NA,18.6,0.021,3.6
65+
Laboratory rat,Rattus,herbi,Rodentia,lc,13,2.4,0.183333333,11,0.0019,0.32
66+
African striped mouse,Rhabdomys,omni,Rodentia,NA,8.7,NA,NA,15.3,NA,0.044
67+
Squirrel monkey,Saimiri,omni,Primates,NA,9.6,1.4,NA,14.4,0.02,0.743
68+
Eastern american mole,Scalopus,insecti,Soricomorpha,lc,8.4,2.1,0.166666667,15.6,0.0012,0.075
69+
Cotton rat,Sigmodon,herbi,Rodentia,NA,11.3,1.1,0.15,12.7,0.00118,0.148
70+
Mole rat,Spalax,NA,Rodentia,NA,10.6,2.4,NA,13.4,0.003,0.122
71+
Arctic ground squirrel,Spermophilus,herbi,Rodentia,lc,16.6,NA,NA,7.4,0.0057,0.92
72+
Thirteen-lined ground squirrel,Spermophilus,herbi,Rodentia,lc,13.8,3.4,0.216666667,10.2,0.004,0.101
73+
Golden-mantled ground squirrel,Spermophilus,herbi,Rodentia,lc,15.9,3,NA,8.1,NA,0.205
74+
Musk shrew,Suncus,NA,Soricomorpha,NA,12.8,2,0.183333333,11.2,0.00033,0.048
75+
Pig,Sus,omni,Artiodactyla,domesticated,9.1,2.4,0.5,14.9,0.18,86.25
76+
Short-nosed echidna,Tachyglossus,insecti,Monotremata,NA,8.6,NA,NA,15.4,0.025,4.5
77+
Eastern american chipmunk,Tamias,herbi,Rodentia,NA,15.8,NA,NA,8.2,NA,0.112
78+
Brazilian tapir,Tapirus,herbi,Perissodactyla,vu,4.4,1,0.9,19.6,0.169,207.501
79+
Tenrec,Tenrec,omni,Afrosoricida,NA,15.6,2.3,NA,8.4,0.0026,0.9
80+
Tree shrew,Tupaia,omni,Scandentia,NA,8.9,2.6,0.233333333,15.1,0.0025,0.104
81+
Bottle-nosed dolphin,Tursiops,carni,Cetacea,NA,5.2,NA,NA,18.8,NA,173.33
82+
Genet,Genetta,carni,Carnivora,NA,6.3,1.3,NA,17.7,0.0175,2
83+
Arctic fox,Vulpes,carni,Carnivora,NA,12.5,NA,NA,11.5,0.0445,3.38
84+
Red fox,Vulpes,carni,Carnivora,NA,9.8,2.4,0.35,14.2,0.0504,4.23

0 commit comments

Comments
 (0)