Skip to content

Commit 114c570

Browse files
Automated commit of generated code
1 parent 2f3f364 commit 114c570

File tree

16 files changed

+546
-239
lines changed

16 files changed

+546
-239
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ public enum class DataSchemaVisibility {
4747
EXPLICIT_PUBLIC,
4848
}
4949

50+
// TODO add more options
5051
public annotation class CsvOptions(public val delimiter: Char)
5152

5253
/**

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

+68-63
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,9 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
2424
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
2525
import org.jetbrains.kotlinx.dataframe.dataTypes.IFRAME
2626
import org.jetbrains.kotlinx.dataframe.dataTypes.IMG
27-
import org.jetbrains.kotlinx.dataframe.exceptions.CellConversionException
28-
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
2927
import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
3028
import org.jetbrains.kotlinx.dataframe.impl.api.convertRowColumnImpl
29+
import org.jetbrains.kotlinx.dataframe.impl.api.convertToDoubleImpl
3130
import org.jetbrains.kotlinx.dataframe.impl.api.convertToTypeImpl
3231
import org.jetbrains.kotlinx.dataframe.impl.api.defaultTimeZone
3332
import org.jetbrains.kotlinx.dataframe.impl.api.toLocalDate
@@ -36,14 +35,12 @@ import org.jetbrains.kotlinx.dataframe.impl.api.toLocalTime
3635
import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
3736
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
3837
import org.jetbrains.kotlinx.dataframe.io.toDataFrame
39-
import org.jetbrains.kotlinx.dataframe.path
4038
import java.math.BigDecimal
4139
import java.net.URL
4240
import java.util.Locale
4341
import kotlin.reflect.KProperty
4442
import kotlin.reflect.KType
4543
import kotlin.reflect.full.isSubtypeOf
46-
import kotlin.reflect.full.withNullability
4744
import kotlin.reflect.typeOf
4845

4946
@Interpretable("Convert0")
@@ -129,15 +126,29 @@ public inline fun <T, C, reified R> Convert<T, C>.perRowCol(
129126

130127
public inline fun <reified C> AnyCol.convertTo(): DataColumn<C> = convertTo(typeOf<C>()) as DataColumn<C>
131128

132-
public fun AnyCol.convertTo(newType: KType): AnyCol {
133-
val isTypesAreCorrect = this.type().withNullability(true).isSubtypeOf(typeOf<String?>()) &&
134-
newType.withNullability(true) == typeOf<Double?>()
129+
@Suppress("UNCHECKED_CAST")
130+
public fun AnyCol.convertTo(newType: KType): AnyCol =
131+
when {
132+
type().isSubtypeOf(typeOf<String?>()) ->
133+
(this as DataColumn<String?>).convertTo(newType)
135134

136-
if (isTypesAreCorrect) {
137-
return (this as DataColumn<String?>).convertToDouble().setNullable(newType.isMarkedNullable)
135+
else -> convertToTypeImpl(newType, null)
136+
}
137+
138+
public inline fun <reified C> DataColumn<String?>.convertTo(parserOptions: ParserOptions? = null): DataColumn<C> =
139+
convertTo(typeOf<C>(), parserOptions) as DataColumn<C>
140+
141+
public fun DataColumn<String?>.convertTo(newType: KType, parserOptions: ParserOptions? = null): AnyCol =
142+
when {
143+
newType.isSubtypeOf(typeOf<Double?>()) ->
144+
convertToDoubleImpl(
145+
locale = parserOptions?.locale,
146+
nullStrings = parserOptions?.nullStrings,
147+
useFastDoubleParser = parserOptions?.useFastDoubleParser,
148+
).setNullable(newType.isMarkedNullable)
149+
150+
else -> convertToTypeImpl(newType, parserOptions)
138151
}
139-
return convertToTypeImpl(newType)
140-
}
141152

142153
@JvmName("convertToLocalDateTimeFromT")
143154
public fun <T : Any> DataColumn<T>.convertToLocalDateTime(): DataColumn<LocalDateTime> = convertTo()
@@ -185,78 +196,72 @@ public fun <T : Any> DataColumn<T>.convertToDouble(): DataColumn<Double> = conve
185196
public fun <T : Any> DataColumn<T?>.convertToDouble(): DataColumn<Double?> = convertTo()
186197

187198
/** Parses a String column to Double considering locale (number format).
188-
* If [locale] parameter is defined, it's number format is used for parsing.
189-
* If [locale] parameter is null, the current system locale is used.
190-
* If the column cannot be parsed, then the POSIX format is used. */
199+
*
200+
* If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
201+
*
202+
* @param locale If defined, its number format is used for parsing.
203+
* The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
204+
* If the column cannot be parsed, the POSIX format is used. */
191205
@JvmName("convertToDoubleFromString")
192206
public fun DataColumn<String>.convertToDouble(locale: Locale? = null): DataColumn<Double> =
193-
convertToDouble(locale = locale, useFastDoubleParser = false)
207+
convertToDouble(locale = locale, nullStrings = null, useFastDoubleParser = null)
194208

195209
/**
196210
* Parses a String column to Double considering locale (number format).
197-
* If [locale] parameter is defined, it's number format is used for parsing.
198-
* If [locale] parameter is null, the current system locale is used.
199-
* If the column cannot be parsed, then the POSIX format is used.
200-
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
211+
*
212+
* If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
213+
*
214+
* @param locale If defined, its number format is used for parsing.
215+
* The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
216+
* If the column cannot be parsed, the POSIX format is used.
217+
* @param nullStrings a set of strings that should be treated as `null` values.
218+
* The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
219+
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
220+
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
201221
*/
202222
@JvmName("convertToDoubleFromString")
203223
public fun DataColumn<String>.convertToDouble(
204224
locale: Locale? = null,
205-
useFastDoubleParser: Boolean,
206-
): DataColumn<Double> = this.castToNullable().convertToDouble(locale, useFastDoubleParser).castToNotNullable()
225+
nullStrings: Set<String>?,
226+
useFastDoubleParser: Boolean?,
227+
): DataColumn<Double> =
228+
this.castToNullable().convertToDouble(locale, nullStrings, useFastDoubleParser).castToNotNullable()
207229

208230
/** Parses a String column to Double considering locale (number format).
209-
* If [locale] parameter is defined, it's number format is used for parsing.
210-
* If [locale] parameter is null, the current system locale is used.
211-
* If the column cannot be parsed, then the POSIX format is used. */
231+
*
232+
* If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
233+
*
234+
* @param locale If defined, its number format is used for parsing.
235+
* The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
236+
* If the column cannot be parsed, the POSIX format is used. */
212237
@JvmName("convertToDoubleFromStringNullable")
213238
public fun DataColumn<String?>.convertToDouble(locale: Locale? = null): DataColumn<Double?> =
214-
convertToDouble(locale = locale, useFastDoubleParser = false)
239+
convertToDouble(locale = locale, nullStrings = null, useFastDoubleParser = null)
215240

216241
/**
217242
* Parses a String column to Double considering locale (number format).
218-
* If [locale] parameter is defined, it's number format is used for parsing.
219-
* If [locale] parameter is null, the current system locale is used.
220-
* If the column cannot be parsed, then the POSIX format is used.
221-
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
243+
*
244+
* If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
245+
*
246+
* @param locale If defined, its number format is used for parsing.
247+
* The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
248+
* If the column cannot be parsed, the POSIX format is used.
249+
* @param nullStrings a set of strings that should be treated as `null` values.
250+
* The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
251+
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
252+
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
222253
*/
223254
@JvmName("convertToDoubleFromStringNullable")
224255
public fun DataColumn<String?>.convertToDouble(
225256
locale: Locale? = null,
226-
useFastDoubleParser: Boolean,
227-
): DataColumn<Double?> {
228-
fun applyParser(parser: (String) -> Double?): DataColumn<Double?> {
229-
var currentRow = 0
230-
try {
231-
return mapIndexed { row, value ->
232-
currentRow = row
233-
value?.let {
234-
parser(value.trim()) ?: throw TypeConversionException(
235-
value = value,
236-
from = typeOf<String>(),
237-
to = typeOf<Double>(),
238-
column = path,
239-
)
240-
}
241-
}
242-
} catch (e: TypeConversionException) {
243-
throw CellConversionException(e.value, e.from, e.to, path, currentRow, e)
244-
}
245-
}
246-
247-
return if (locale != null) {
248-
val explicitParser = Parsers.getDoubleParser(locale, useFastDoubleParser)
249-
applyParser(explicitParser)
250-
} else {
251-
try {
252-
val defaultParser = Parsers.getDoubleParser(useFastDoubleParser = useFastDoubleParser)
253-
applyParser(defaultParser)
254-
} catch (e: TypeConversionException) {
255-
val posixParser = Parsers.getDoubleParser(Locale.forLanguageTag("C.UTF-8"), useFastDoubleParser)
256-
applyParser(posixParser)
257-
}
258-
}
259-
}
257+
nullStrings: Set<String>?,
258+
useFastDoubleParser: Boolean?,
259+
): DataColumn<Double?> =
260+
convertToDoubleImpl(
261+
locale = locale,
262+
nullStrings = nullStrings,
263+
useFastDoubleParser = useFastDoubleParser,
264+
)
260265

261266
@JvmName("convertToFloatFromT")
262267
public fun <T : Any> DataColumn<T>.convertToFloat(): DataColumn<Float> = convertTo()

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

+90-9
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,25 @@ import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
1010
import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
1111
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
1212
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
13+
import org.jetbrains.kotlinx.dataframe.io.readCSV
1314
import org.jetbrains.kotlinx.dataframe.typeClass
1415
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
1516
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS_COPY
1617
import java.time.format.DateTimeFormatter
1718
import java.util.Locale
1819
import kotlin.reflect.KProperty
20+
import kotlin.reflect.KType
1921

20-
public val DataFrame.Companion.parser: GlobalParserOptions get() = Parsers
22+
/**
23+
* ### Global Parser Options
24+
*
25+
* These options are used to configure how [DataColumns][DataColumn] of type [String] or [String?][String]
26+
* should be parsed.
27+
* You can always pass a [ParserOptions] object to functions that perform parsing, like [tryParse], [parse],
28+
* or even [DataFrame.readCSV][DataFrame.Companion.readCSV] to override these options.
29+
*/
30+
public val DataFrame.Companion.parser: GlobalParserOptions
31+
get() = Parsers
2132

2233
public fun <T> DataFrame<T>.parse(options: ParserOptions? = null, columns: ColumnsSelector<T, Any?>): DataFrame<T> =
2334
parseImpl(options, columns)
@@ -37,14 +48,33 @@ public interface GlobalParserOptions {
3748

3849
public fun addNullString(str: String)
3950

51+
/** This function can be called to skip some types. Parsing will be attempted for all other types. */
52+
public fun addSkipType(type: KType)
53+
54+
/** Whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now. */
55+
public var useFastDoubleParser: Boolean
56+
4057
public fun resetToDefault()
4158

4259
public var locale: Locale
60+
61+
public val nulls: Set<String>
62+
63+
public val skipTypes: Set<KType>
4364
}
4465

4566
/**
4667
* ### Options for parsing [String]`?` columns
4768
*
69+
* These options are used to configure how [DataColumn]s of type [String] or [String?][String] should be parsed.
70+
* They can be passed to [tryParse] and [parse] functions.
71+
*
72+
* You can also use the [DataFrame.parser][DataFrame.Companion.parser] property to access and modify
73+
* the global parser configuration.
74+
*
75+
* If any of the arguments in [ParserOptions] are `null` (or [ParserOptions] itself is `null`),
76+
* the global configuration will be queried.
77+
*
4878
* @param locale locale to use for parsing dates and numbers, defaults to the System default locale.
4979
* If specified instead of [dateTimeFormatter], it will be used in combination with [dateTimePattern]
5080
* to create a [DateTimeFormatter]. Just providing [locale] will not allow you to parse
@@ -55,16 +85,19 @@ public interface GlobalParserOptions {
5585
* @param dateTimePattern a pattern to use for parsing dates. If specified instead of [dateTimeFormatter],
5686
* it will be used to create a [DateTimeFormatter].
5787
* @param nullStrings a set of strings that should be treated as `null` values. By default, it's
58-
* ["null", "NULL", "NA", "N/A"].
88+
* `["null", "NULL", "NA", "N/A"]`.
89+
* @param skipTypes a set of types that should be skipped during parsing. Parsing will be attempted for all other types.
90+
* By default, it's an empty set. To skip all types except a specified one, use [convertTo] instead.
5991
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
6092
*/
61-
public data class ParserOptions(
62-
val locale: Locale? = null,
93+
public class ParserOptions(
94+
public val locale: Locale? = null,
6395
// TODO, migrate to kotlinx.datetime.format.DateTimeFormat? https://github.com/Kotlin/dataframe/issues/876
64-
val dateTimeFormatter: DateTimeFormatter? = null,
65-
val dateTimePattern: String? = null,
66-
val nullStrings: Set<String>? = null,
67-
val useFastDoubleParser: Boolean = false,
96+
public val dateTimeFormatter: DateTimeFormatter? = null,
97+
public val dateTimePattern: String? = null,
98+
public val nullStrings: Set<String>? = null,
99+
public val skipTypes: Set<KType>? = null,
100+
public val useFastDoubleParser: Boolean? = null,
68101
) {
69102

70103
/** For binary compatibility. */
@@ -82,7 +115,8 @@ public data class ParserOptions(
82115
dateTimeFormatter = dateTimeFormatter,
83116
dateTimePattern = dateTimePattern,
84117
nullStrings = nullStrings,
85-
useFastDoubleParser = false,
118+
skipTypes = null,
119+
useFastDoubleParser = null,
86120
)
87121

88122
/** For binary compatibility. */
@@ -101,6 +135,7 @@ public data class ParserOptions(
101135
dateTimeFormatter = dateTimeFormatter,
102136
dateTimePattern = dateTimePattern,
103137
nullStrings = nullStrings,
138+
skipTypes = skipTypes,
104139
useFastDoubleParser = useFastDoubleParser,
105140
)
106141

@@ -111,6 +146,52 @@ public data class ParserOptions(
111146
dateTimePattern != null -> DateTimeFormatter.ofPattern(dateTimePattern)
112147
else -> null
113148
}
149+
150+
public fun copy(
151+
locale: Locale? = this.locale,
152+
dateTimeFormatter: DateTimeFormatter? = this.dateTimeFormatter,
153+
dateTimePattern: String? = this.dateTimePattern,
154+
nullStrings: Set<String>? = this.nullStrings,
155+
skipTypes: Set<KType>? = this.skipTypes,
156+
useFastDoubleParser: Boolean? = this.useFastDoubleParser,
157+
): ParserOptions =
158+
ParserOptions(
159+
locale = locale,
160+
dateTimeFormatter = dateTimeFormatter,
161+
dateTimePattern = dateTimePattern,
162+
nullStrings = nullStrings,
163+
skipTypes = skipTypes,
164+
useFastDoubleParser = useFastDoubleParser,
165+
)
166+
167+
override fun equals(other: Any?): Boolean {
168+
if (this === other) return true
169+
if (javaClass != other?.javaClass) return false
170+
171+
other as ParserOptions
172+
173+
if (useFastDoubleParser != other.useFastDoubleParser) return false
174+
if (locale != other.locale) return false
175+
if (dateTimeFormatter != other.dateTimeFormatter) return false
176+
if (dateTimePattern != other.dateTimePattern) return false
177+
if (nullStrings != other.nullStrings) return false
178+
if (skipTypes != other.skipTypes) return false
179+
180+
return true
181+
}
182+
183+
override fun hashCode(): Int {
184+
var result = useFastDoubleParser?.hashCode() ?: 0
185+
result = 31 * result + (locale?.hashCode() ?: 0)
186+
result = 31 * result + (dateTimeFormatter?.hashCode() ?: 0)
187+
result = 31 * result + (dateTimePattern?.hashCode() ?: 0)
188+
result = 31 * result + (nullStrings?.hashCode() ?: 0)
189+
result = 31 * result + (skipTypes?.hashCode() ?: 0)
190+
return result
191+
}
192+
193+
override fun toString(): String =
194+
"ParserOptions(locale=$locale, dateTimeFormatter=$dateTimeFormatter, dateTimePattern=$dateTimePattern, nullStrings=$nullStrings, skipTypes=$skipTypes, useFastDoubleParser=$useFastDoubleParser)"
114195
}
115196

116197
/** Tries to parse a column of strings into a column of a different type.

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnNameGenerator.kt

+5-5
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ package org.jetbrains.kotlinx.dataframe.impl
22

33
import org.jetbrains.kotlinx.dataframe.AnyFrame
44

5-
internal class ColumnNameGenerator(columnNames: List<String> = emptyList()) {
5+
public class ColumnNameGenerator(columnNames: List<String> = emptyList()) {
66

77
private val usedNames = columnNames.toMutableSet()
88

99
private val colNames = columnNames.toMutableList()
1010

11-
fun addUnique(preferredName: String): String {
11+
public fun addUnique(preferredName: String): String {
1212
var name = preferredName
1313
var k = 1
1414
while (usedNames.contains(name)) {
@@ -19,17 +19,17 @@ internal class ColumnNameGenerator(columnNames: List<String> = emptyList()) {
1919
return name
2020
}
2121

22-
fun addIfAbsent(name: String) {
22+
public fun addIfAbsent(name: String) {
2323
if (!usedNames.contains(name)) {
2424
usedNames.add(name)
2525
colNames.add(name)
2626
}
2727
}
2828

29-
val names: List<String>
29+
public val names: List<String>
3030
get() = colNames
3131

32-
fun contains(name: String) = usedNames.contains(name)
32+
public operator fun contains(name: String): Boolean = usedNames.contains(name)
3333
}
3434

3535
internal fun AnyFrame.nameGenerator() = ColumnNameGenerator(columnNames())

0 commit comments

Comments
 (0)