@@ -24,10 +24,9 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
2424import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
2525import org.jetbrains.kotlinx.dataframe.dataTypes.IFRAME
2626import org.jetbrains.kotlinx.dataframe.dataTypes.IMG
27- import org.jetbrains.kotlinx.dataframe.exceptions.CellConversionException
28- import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
2927import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
3028import org.jetbrains.kotlinx.dataframe.impl.api.convertRowColumnImpl
29+ import org.jetbrains.kotlinx.dataframe.impl.api.convertToDoubleImpl
3130import org.jetbrains.kotlinx.dataframe.impl.api.convertToTypeImpl
3231import org.jetbrains.kotlinx.dataframe.impl.api.defaultTimeZone
3332import org.jetbrains.kotlinx.dataframe.impl.api.toLocalDate
@@ -36,14 +35,12 @@ import org.jetbrains.kotlinx.dataframe.impl.api.toLocalTime
3635import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
3736import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
3837import org.jetbrains.kotlinx.dataframe.io.toDataFrame
39- import org.jetbrains.kotlinx.dataframe.path
4038import java.math.BigDecimal
4139import java.net.URL
4240import java.util.Locale
4341import kotlin.reflect.KProperty
4442import kotlin.reflect.KType
4543import kotlin.reflect.full.isSubtypeOf
46- import kotlin.reflect.full.withNullability
4744import kotlin.reflect.typeOf
4845
4946@Interpretable(" Convert0" )
@@ -129,15 +126,29 @@ public inline fun <T, C, reified R> Convert<T, C>.perRowCol(
129126
130127public inline fun <reified C > AnyCol.convertTo (): DataColumn <C > = convertTo(typeOf<C >()) as DataColumn <C >
131128
132- public fun AnyCol.convertTo (newType : KType ): AnyCol {
133- val isTypesAreCorrect = this .type().withNullability(true ).isSubtypeOf(typeOf<String ?>()) &&
134- newType.withNullability(true ) == typeOf<Double ?>()
129+ @Suppress(" UNCHECKED_CAST" )
130+ public fun AnyCol.convertTo (newType : KType ): AnyCol =
131+ when {
132+ type().isSubtypeOf(typeOf<String ?>()) ->
133+ (this as DataColumn <String ?>).convertTo(newType)
135134
136- if (isTypesAreCorrect) {
137- return (this as DataColumn <String ?>).convertToDouble().setNullable(newType.isMarkedNullable)
135+ else -> convertToTypeImpl(newType, null )
136+ }
137+
138+ public inline fun <reified C > DataColumn<String?>.convertTo (parserOptions : ParserOptions ? = null): DataColumn <C > =
139+ convertTo(typeOf<C >(), parserOptions) as DataColumn <C >
140+
141+ public fun DataColumn<String?>.convertTo (newType : KType , parserOptions : ParserOptions ? = null): AnyCol =
142+ when {
143+ newType.isSubtypeOf(typeOf<Double ?>()) ->
144+ convertToDoubleImpl(
145+ locale = parserOptions?.locale,
146+ nullStrings = parserOptions?.nullStrings,
147+ useFastDoubleParser = parserOptions?.useFastDoubleParser,
148+ ).setNullable(newType.isMarkedNullable)
149+
150+ else -> convertToTypeImpl(newType, parserOptions)
138151 }
139- return convertToTypeImpl(newType)
140- }
141152
142153@JvmName(" convertToLocalDateTimeFromT" )
143154public fun <T : Any > DataColumn<T>.convertToLocalDateTime (): DataColumn <LocalDateTime > = convertTo()
@@ -185,78 +196,72 @@ public fun <T : Any> DataColumn<T>.convertToDouble(): DataColumn<Double> = conve
185196public fun <T : Any > DataColumn<T?>.convertToDouble (): DataColumn <Double ?> = convertTo()
186197
187198/* * Parses a String column to Double considering locale (number format).
188- * If [locale] parameter is defined, it's number format is used for parsing.
189- * If [locale] parameter is null, the current system locale is used.
190- * If the column cannot be parsed, then the POSIX format is used. */
199+ *
200+ * If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
201+ *
202+ * @param locale If defined, its number format is used for parsing.
203+ * The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
204+ * If the column cannot be parsed, the POSIX format is used. */
191205@JvmName(" convertToDoubleFromString" )
192206public fun DataColumn<String>.convertToDouble (locale : Locale ? = null): DataColumn <Double > =
193- convertToDouble(locale = locale, useFastDoubleParser = false )
207+ convertToDouble(locale = locale, nullStrings = null , useFastDoubleParser = null )
194208
195209/* *
196210 * Parses a String column to Double considering locale (number format).
197- * If [locale] parameter is defined, it's number format is used for parsing.
198- * If [locale] parameter is null, the current system locale is used.
199- * If the column cannot be parsed, then the POSIX format is used.
200- * @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
211+ *
212+ * If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
213+ *
214+ * @param locale If defined, its number format is used for parsing.
215+ * The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
216+ * If the column cannot be parsed, the POSIX format is used.
217+ * @param nullStrings a set of strings that should be treated as `null` values.
218+ * The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
219+ * @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
220+ * The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
201221 */
202222@JvmName(" convertToDoubleFromString" )
203223public fun DataColumn<String>.convertToDouble (
204224 locale : Locale ? = null,
205- useFastDoubleParser : Boolean ,
206- ): DataColumn <Double > = this .castToNullable().convertToDouble(locale, useFastDoubleParser).castToNotNullable()
225+ nullStrings : Set <String >? ,
226+ useFastDoubleParser : Boolean? ,
227+ ): DataColumn <Double > =
228+ this .castToNullable().convertToDouble(locale, nullStrings, useFastDoubleParser).castToNotNullable()
207229
208230/* * Parses a String column to Double considering locale (number format).
209- * If [locale] parameter is defined, it's number format is used for parsing.
210- * If [locale] parameter is null, the current system locale is used.
211- * If the column cannot be parsed, then the POSIX format is used. */
231+ *
232+ * If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
233+ *
234+ * @param locale If defined, its number format is used for parsing.
235+ * The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
236+ * If the column cannot be parsed, the POSIX format is used. */
212237@JvmName(" convertToDoubleFromStringNullable" )
213238public fun DataColumn<String?>.convertToDouble (locale : Locale ? = null): DataColumn <Double ?> =
214- convertToDouble(locale = locale, useFastDoubleParser = false )
239+ convertToDouble(locale = locale, nullStrings = null , useFastDoubleParser = null )
215240
216241/* *
217242 * Parses a String column to Double considering locale (number format).
218- * If [locale] parameter is defined, it's number format is used for parsing.
219- * If [locale] parameter is null, the current system locale is used.
220- * If the column cannot be parsed, then the POSIX format is used.
221- * @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
243+ *
244+ * If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
245+ *
246+ * @param locale If defined, its number format is used for parsing.
247+ * The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
248+ * If the column cannot be parsed, the POSIX format is used.
249+ * @param nullStrings a set of strings that should be treated as `null` values.
250+ * The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
251+ * @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
252+ * The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
222253 */
223254@JvmName(" convertToDoubleFromStringNullable" )
224255public fun DataColumn<String?>.convertToDouble (
225256 locale : Locale ? = null,
226- useFastDoubleParser : Boolean ,
227- ): DataColumn <Double ?> {
228- fun applyParser (parser : (String ) -> Double? ): DataColumn <Double ?> {
229- var currentRow = 0
230- try {
231- return mapIndexed { row, value ->
232- currentRow = row
233- value?.let {
234- parser(value.trim()) ? : throw TypeConversionException (
235- value = value,
236- from = typeOf<String >(),
237- to = typeOf<Double >(),
238- column = path,
239- )
240- }
241- }
242- } catch (e: TypeConversionException ) {
243- throw CellConversionException (e.value, e.from, e.to, path, currentRow, e)
244- }
245- }
246-
247- return if (locale != null ) {
248- val explicitParser = Parsers .getDoubleParser(locale, useFastDoubleParser)
249- applyParser(explicitParser)
250- } else {
251- try {
252- val defaultParser = Parsers .getDoubleParser(useFastDoubleParser = useFastDoubleParser)
253- applyParser(defaultParser)
254- } catch (e: TypeConversionException ) {
255- val posixParser = Parsers .getDoubleParser(Locale .forLanguageTag(" C.UTF-8" ), useFastDoubleParser)
256- applyParser(posixParser)
257- }
258- }
259- }
257+ nullStrings : Set <String >? ,
258+ useFastDoubleParser : Boolean? ,
259+ ): DataColumn <Double ?> =
260+ convertToDoubleImpl(
261+ locale = locale,
262+ nullStrings = nullStrings,
263+ useFastDoubleParser = useFastDoubleParser,
264+ )
260265
261266@JvmName(" convertToFloatFromT" )
262267public fun <T : Any > DataColumn<T>.convertToFloat (): DataColumn <Float > = convertTo()
0 commit comments