Skip to content

Commit 21c91f6

Browse files
committed
Merge branch 'refs/heads/master' into new-csv-implementation
2 parents 54f3d18 + a8cee48 commit 21c91f6

File tree

4 files changed

+45
-47
lines changed

4 files changed

+45
-47
lines changed

core/build.gradle.kts

-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ dependencies {
7474
implementation(libs.fuel)
7575

7676
api(libs.kotlin.datetimeJvm)
77-
implementation(libs.kotlin.coroutinesCore)
7877
implementation(libs.kotlinpoet)
7978
implementation(libs.sl4j)
8079
implementation(libs.kotlinLogging)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

+17
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
77
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
88
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
99
import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
10+
import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
1011
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
1112
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
1213
import org.jetbrains.kotlinx.dataframe.typeClass
@@ -86,13 +87,29 @@ public data class ParserOptions(
8687
}
8788
}
8889

90+
/** @include [tryParseImpl] */
8991
public fun DataColumn<String?>.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options)
9092

9193
public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T> =
9294
parse(options) {
9395
colsAtAnyDepth { !it.isColumnGroup() }
9496
}
9597

98+
/**
99+
* Tries to parse a column of strings into a column of a different type.
100+
* Each parser in [Parsers] is run in order until a valid parser is found,
101+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
102+
* fails to parse any value, the next parser is tried.
103+
*
104+
* If all fail [IllegalStateException] is thrown. If you don't want this exception to be thrown,
105+
* use [tryParse] instead.
106+
*
107+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
108+
*
109+
* @param options options for parsing, like providing a locale or a custom date-time formatter
110+
* @throws IllegalStateException if no valid parser is found
111+
* @return a new column with parsed values
112+
*/
96113
public fun DataColumn<String?>.parse(options: ParserOptions? = null): DataColumn<*> =
97114
tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") }
98115

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

+28-44
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
package org.jetbrains.kotlinx.dataframe.impl.api
22

3-
import kotlinx.coroutines.async
4-
import kotlinx.coroutines.awaitAll
5-
import kotlinx.coroutines.coroutineScope
6-
import kotlinx.coroutines.runBlocking
73
import kotlinx.datetime.Instant
84
import kotlinx.datetime.LocalDate
95
import kotlinx.datetime.LocalDateTime
@@ -527,44 +523,32 @@ internal fun <T> DataColumn<String?>.parse(parser: StringParser<T>, options: Par
527523
return DataColumn.createValueColumn(name(), parsedValues, parser.type.withNullability(hasNulls)) as DataColumn<T?>
528524
}
529525

530-
internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: ColumnsSelector<T, Any?>): DataFrame<T> =
531-
runBlocking { parseParallel(options, columns) }
532-
533-
private suspend fun <T> DataFrame<T>.parseParallel(
534-
options: ParserOptions?,
535-
columns: ColumnsSelector<T, Any?>,
536-
): DataFrame<T> =
537-
coroutineScope {
538-
val convertedCols = getColumnsWithPaths(columns).map { col ->
539-
async {
540-
when {
541-
// when a frame column is requested to be parsed,
542-
// parse each value/frame column at any depth inside each DataFrame in the frame column
543-
col.isFrameColumn() ->
544-
col.values.map {
545-
async {
546-
it.parseParallel(options) {
547-
colsAtAnyDepth { !it.isColumnGroup() }
548-
}
549-
}
550-
}.awaitAll()
551-
.toColumn(col.name)
552-
553-
// when a column group is requested to be parsed,
554-
// parse each column in the group
555-
col.isColumnGroup() ->
556-
col.parseParallel(options) { all() }
557-
.asColumnGroup(col.name())
558-
.asDataColumn()
559-
560-
// Base case, parse the column if it's a `String?` column
561-
col.isSubtypeOf<String?>() ->
562-
col.cast<String?>().tryParse(options)
563-
564-
else -> col
565-
}.let { ColumnToInsert(col.path, it) }
566-
}
567-
}.awaitAll()
568-
569-
emptyDataFrame<T>().insertImpl(convertedCols)
526+
internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: ColumnsSelector<T, Any?>): DataFrame<T> {
527+
val convertedCols = getColumnsWithPaths(columns).map { col ->
528+
when {
529+
// when a frame column is requested to be parsed,
530+
// parse each value/frame column at any depth inside each DataFrame in the frame column
531+
col.isFrameColumn() ->
532+
col.values.map {
533+
it.parseImpl(options) {
534+
colsAtAnyDepth { !it.isColumnGroup() }
535+
}
536+
}.toColumn(col.name)
537+
538+
// when a column group is requested to be parsed,
539+
// parse each column in the group
540+
col.isColumnGroup() ->
541+
col.parseImpl(options) { all() }
542+
.asColumnGroup(col.name())
543+
.asDataColumn()
544+
545+
// Base case, parse the column if it's a `String?` column
546+
col.isSubtypeOf<String?>() ->
547+
col.cast<String?>().tryParse(options)
548+
549+
else -> col
550+
}.let { ColumnToInsert(col.path, it) }
570551
}
552+
553+
return emptyDataFrame<T>().insertImpl(convertedCols)
554+
}

gradle/libs.versions.toml

-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ postgresql = "42.7.4"
3333
sqlite = "3.46.1.0"
3434
jtsCore = "1.19.0"
3535
kotlinDatetime = "0.6.1"
36-
coroutines = "1.9.0"
3736
openapi = "2.1.22"
3837
kotlinLogging = "7.0.0"
3938
sl4j = "2.0.16"
@@ -90,7 +89,6 @@ jts = { group = "org.locationtech.jts", name = "jts-core", version.ref = "jtsCor
9089

9190
poi-ooxml = { group = "org.apache.poi", name = "poi-ooxml", version.ref = "poi" }
9291
kotlin-datetimeJvm = { group = "org.jetbrains.kotlinx", name = "kotlinx-datetime-jvm", version.ref = "kotlinDatetime" }
93-
kotlin-coroutinesCore = { group = "org.jetbrains.kotlinx", name = "kotlinx-coroutines-core", version.ref = "coroutines" }
9492

9593
junit = { group = "junit", name = "junit", version.ref = "junit" }
9694

0 commit comments

Comments
 (0)