@@ -24,10 +24,9 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
24
24
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
25
25
import org.jetbrains.kotlinx.dataframe.dataTypes.IFRAME
26
26
import org.jetbrains.kotlinx.dataframe.dataTypes.IMG
27
- import org.jetbrains.kotlinx.dataframe.exceptions.CellConversionException
28
- import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
29
27
import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
30
28
import org.jetbrains.kotlinx.dataframe.impl.api.convertRowColumnImpl
29
+ import org.jetbrains.kotlinx.dataframe.impl.api.convertToDoubleImpl
31
30
import org.jetbrains.kotlinx.dataframe.impl.api.convertToTypeImpl
32
31
import org.jetbrains.kotlinx.dataframe.impl.api.defaultTimeZone
33
32
import org.jetbrains.kotlinx.dataframe.impl.api.toLocalDate
@@ -36,14 +35,12 @@ import org.jetbrains.kotlinx.dataframe.impl.api.toLocalTime
36
35
import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
37
36
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
38
37
import org.jetbrains.kotlinx.dataframe.io.toDataFrame
39
- import org.jetbrains.kotlinx.dataframe.path
40
38
import java.math.BigDecimal
41
39
import java.net.URL
42
40
import java.util.Locale
43
41
import kotlin.reflect.KProperty
44
42
import kotlin.reflect.KType
45
43
import kotlin.reflect.full.isSubtypeOf
46
- import kotlin.reflect.full.withNullability
47
44
import kotlin.reflect.typeOf
48
45
49
46
@Interpretable(" Convert0" )
@@ -129,15 +126,29 @@ public inline fun <T, C, reified R> Convert<T, C>.perRowCol(
129
126
130
127
public inline fun <reified C > AnyCol.convertTo (): DataColumn <C > = convertTo(typeOf<C >()) as DataColumn <C >
131
128
132
- public fun AnyCol.convertTo (newType : KType ): AnyCol {
133
- val isTypesAreCorrect = this .type().withNullability(true ).isSubtypeOf(typeOf<String ?>()) &&
134
- newType.withNullability(true ) == typeOf<Double ?>()
129
+ @Suppress(" UNCHECKED_CAST" )
130
+ public fun AnyCol.convertTo (newType : KType ): AnyCol =
131
+ when {
132
+ type().isSubtypeOf(typeOf<String ?>()) ->
133
+ (this as DataColumn <String ?>).convertTo(newType)
135
134
136
- if (isTypesAreCorrect) {
137
- return (this as DataColumn <String ?>).convertToDouble().setNullable(newType.isMarkedNullable)
135
+ else -> convertToTypeImpl(newType, null )
136
+ }
137
+
138
+ public inline fun <reified C > DataColumn<String?>.convertTo (parserOptions : ParserOptions ? = null): DataColumn <C > =
139
+ convertTo(typeOf<C >(), parserOptions) as DataColumn <C >
140
+
141
+ public fun DataColumn<String?>.convertTo (newType : KType , parserOptions : ParserOptions ? = null): AnyCol =
142
+ when {
143
+ newType.isSubtypeOf(typeOf<Double ?>()) ->
144
+ convertToDoubleImpl(
145
+ locale = parserOptions?.locale,
146
+ nullStrings = parserOptions?.nullStrings,
147
+ useFastDoubleParser = parserOptions?.useFastDoubleParser,
148
+ ).setNullable(newType.isMarkedNullable)
149
+
150
+ else -> convertToTypeImpl(newType, parserOptions)
138
151
}
139
- return convertToTypeImpl(newType)
140
- }
141
152
142
153
@JvmName(" convertToLocalDateTimeFromT" )
143
154
public fun <T : Any > DataColumn<T>.convertToLocalDateTime (): DataColumn <LocalDateTime > = convertTo()
@@ -185,78 +196,72 @@ public fun <T : Any> DataColumn<T>.convertToDouble(): DataColumn<Double> = conve
185
196
public fun <T : Any > DataColumn<T?>.convertToDouble (): DataColumn <Double ?> = convertTo()
186
197
187
198
/* * Parses a String column to Double considering locale (number format).
188
- * If [locale] parameter is defined, it's number format is used for parsing.
189
- * If [locale] parameter is null, the current system locale is used.
190
- * If the column cannot be parsed, then the POSIX format is used. */
199
+ *
200
+ * If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
201
+ *
202
+ * @param locale If defined, its number format is used for parsing.
203
+ * The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
204
+ * If the column cannot be parsed, the POSIX format is used. */
191
205
@JvmName(" convertToDoubleFromString" )
192
206
public fun DataColumn<String>.convertToDouble (locale : Locale ? = null): DataColumn <Double > =
193
- convertToDouble(locale = locale, useFastDoubleParser = false )
207
+ convertToDouble(locale = locale, nullStrings = null , useFastDoubleParser = null )
194
208
195
209
/* *
196
210
* Parses a String column to Double considering locale (number format).
197
- * If [locale] parameter is defined, it's number format is used for parsing.
198
- * If [locale] parameter is null, the current system locale is used.
199
- * If the column cannot be parsed, then the POSIX format is used.
200
- * @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
211
+ *
212
+ * If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
213
+ *
214
+ * @param locale If defined, its number format is used for parsing.
215
+ * The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
216
+ * If the column cannot be parsed, the POSIX format is used.
217
+ * @param nullStrings a set of strings that should be treated as `null` values.
218
+ * The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
219
+ * @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
220
+ * The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
201
221
*/
202
222
@JvmName(" convertToDoubleFromString" )
203
223
public fun DataColumn<String>.convertToDouble (
204
224
locale : Locale ? = null,
205
- useFastDoubleParser : Boolean ,
206
- ): DataColumn <Double > = this .castToNullable().convertToDouble(locale, useFastDoubleParser).castToNotNullable()
225
+ nullStrings : Set <String >? ,
226
+ useFastDoubleParser : Boolean? ,
227
+ ): DataColumn <Double > =
228
+ this .castToNullable().convertToDouble(locale, nullStrings, useFastDoubleParser).castToNotNullable()
207
229
208
230
/* * Parses a String column to Double considering locale (number format).
209
- * If [locale] parameter is defined, it's number format is used for parsing.
210
- * If [locale] parameter is null, the current system locale is used.
211
- * If the column cannot be parsed, then the POSIX format is used. */
231
+ *
232
+ * If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
233
+ *
234
+ * @param locale If defined, its number format is used for parsing.
235
+ * The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
236
+ * If the column cannot be parsed, the POSIX format is used. */
212
237
@JvmName(" convertToDoubleFromStringNullable" )
213
238
public fun DataColumn<String?>.convertToDouble (locale : Locale ? = null): DataColumn <Double ?> =
214
- convertToDouble(locale = locale, useFastDoubleParser = false )
239
+ convertToDouble(locale = locale, nullStrings = null , useFastDoubleParser = null )
215
240
216
241
/* *
217
242
* Parses a String column to Double considering locale (number format).
218
- * If [locale] parameter is defined, it's number format is used for parsing.
219
- * If [locale] parameter is null, the current system locale is used.
220
- * If the column cannot be parsed, then the POSIX format is used.
221
- * @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
243
+ *
244
+ * If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
245
+ *
246
+ * @param locale If defined, its number format is used for parsing.
247
+ * The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
248
+ * If the column cannot be parsed, the POSIX format is used.
249
+ * @param nullStrings a set of strings that should be treated as `null` values.
250
+ * The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
251
+ * @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
252
+ * The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
222
253
*/
223
254
@JvmName(" convertToDoubleFromStringNullable" )
224
255
public fun DataColumn<String?>.convertToDouble (
225
256
locale : Locale ? = null,
226
- useFastDoubleParser : Boolean ,
227
- ): DataColumn <Double ?> {
228
- fun applyParser (parser : (String ) -> Double? ): DataColumn <Double ?> {
229
- var currentRow = 0
230
- try {
231
- return mapIndexed { row, value ->
232
- currentRow = row
233
- value?.let {
234
- parser(value.trim()) ? : throw TypeConversionException (
235
- value = value,
236
- from = typeOf<String >(),
237
- to = typeOf<Double >(),
238
- column = path,
239
- )
240
- }
241
- }
242
- } catch (e: TypeConversionException ) {
243
- throw CellConversionException (e.value, e.from, e.to, path, currentRow, e)
244
- }
245
- }
246
-
247
- return if (locale != null ) {
248
- val explicitParser = Parsers .getDoubleParser(locale, useFastDoubleParser)
249
- applyParser(explicitParser)
250
- } else {
251
- try {
252
- val defaultParser = Parsers .getDoubleParser(useFastDoubleParser = useFastDoubleParser)
253
- applyParser(defaultParser)
254
- } catch (e: TypeConversionException ) {
255
- val posixParser = Parsers .getDoubleParser(Locale .forLanguageTag(" C.UTF-8" ), useFastDoubleParser)
256
- applyParser(posixParser)
257
- }
258
- }
259
- }
257
+ nullStrings : Set <String >? ,
258
+ useFastDoubleParser : Boolean? ,
259
+ ): DataColumn <Double ?> =
260
+ convertToDoubleImpl(
261
+ locale = locale,
262
+ nullStrings = nullStrings,
263
+ useFastDoubleParser = useFastDoubleParser,
264
+ )
260
265
261
266
@JvmName(" convertToFloatFromT" )
262
267
public fun <T : Any > DataColumn<T>.convertToFloat (): DataColumn <Float > = convertTo()
0 commit comments