@@ -5,24 +5,21 @@ import kotlinx.datetime.LocalDate
5
5
import kotlinx.datetime.LocalDateTime
6
6
import kotlinx.datetime.LocalTime
7
7
import org.apache.commons.csv.CSVFormat
8
- import org.apache.commons.csv.CSVRecord
9
8
import org.apache.commons.io.input.BOMInputStream
10
9
import org.jetbrains.kotlinx.dataframe.AnyFrame
11
10
import org.jetbrains.kotlinx.dataframe.AnyRow
12
- import org.jetbrains.kotlinx.dataframe.DataColumn
13
11
import org.jetbrains.kotlinx.dataframe.DataFrame
14
12
import org.jetbrains.kotlinx.dataframe.DataRow
15
13
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
16
14
import org.jetbrains.kotlinx.dataframe.annotations.OptInRefine
17
15
import org.jetbrains.kotlinx.dataframe.annotations.Refine
18
16
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
19
17
import org.jetbrains.kotlinx.dataframe.api.forEach
20
- import org.jetbrains.kotlinx.dataframe.api.toDataFrame
21
- import org.jetbrains.kotlinx.dataframe.api.tryParse
22
18
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadCsvMethod
23
19
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
24
- import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
25
20
import org.jetbrains.kotlinx.dataframe.impl.api.parse
21
+ import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
22
+ import org.jetbrains.kotlinx.dataframe.io.ColType.String
26
23
import org.jetbrains.kotlinx.dataframe.util.AS_URL
27
24
import org.jetbrains.kotlinx.dataframe.util.AS_URL_IMPORT
28
25
import org.jetbrains.kotlinx.dataframe.util.AS_URL_REPLACE
@@ -46,7 +43,6 @@ import java.nio.charset.Charset
46
43
import java.util.zip.GZIPInputStream
47
44
import kotlin.reflect.KClass
48
45
import kotlin.reflect.KType
49
- import kotlin.reflect.full.withNullability
50
46
import kotlin.reflect.typeOf
51
47
import kotlin.time.Duration
52
48
@@ -354,74 +350,24 @@ public fun DataFrame.Companion.readDelim(
354
350
skipLines : Int = 0,
355
351
readLines : Int? = null,
356
352
parserOptions : ParserOptions ? = null,
357
- ): AnyFrame {
353
+ ): AnyFrame =
358
354
try {
359
- var reader = reader
360
- if (skipLines > 0 ) {
361
- reader = BufferedReader (reader)
362
- repeat(skipLines) { reader.readLine() }
363
- }
364
-
365
- val csvParser = format.parse(reader)
366
- val records = if (readLines == null ) {
367
- csvParser.records
368
- } else {
369
- require(readLines >= 0 ) { " `readLines` must not be negative" }
370
- val records = ArrayList <CSVRecord >(readLines)
371
- val iter = csvParser.iterator()
372
- var count = readLines ? : 0
373
- while (iter.hasNext() && 0 < count-- ) {
374
- records.add(iter.next())
375
- }
376
- records
377
- }
378
-
379
- val columnNames = csvParser.headerNames.takeIf { it.isNotEmpty() }
380
- ? : (1 .. (records.firstOrNull()?.count() ? : 0 )).map { index -> " X$index " }
381
-
382
- val generator = ColumnNameGenerator ()
383
- val uniqueNames = columnNames.map { generator.addUnique(it) }
384
-
385
- val cols = uniqueNames.mapIndexed { colIndex, colName ->
386
- val defaultColType = colTypes[" .default" ]
387
- val colType = colTypes[colName] ? : defaultColType
388
- var hasNulls = false
389
- val values = records.map {
390
- if (it.isSet(colIndex)) {
391
- it[colIndex].ifEmpty {
392
- hasNulls = true
393
- null
394
- }
395
- } else {
396
- hasNulls = true
397
- null
398
- }
399
- }
400
- val column = DataColumn .createValueColumn(colName, values, typeOf<String >().withNullability(hasNulls))
401
- val skipTypes = when {
402
- colType != null ->
403
- // skip all types except the desired type
404
- ParserOptions .allTypesExcept(colType.toKType())
405
-
406
- else ->
407
- // respect the provided parser options
408
- parserOptions?.skipTypes ? : emptySet()
409
- }
410
- val adjustsedParserOptions = (parserOptions ? : ParserOptions ())
411
- .copy(skipTypes = skipTypes)
412
-
413
- return @mapIndexed column.tryParse(adjustsedParserOptions)
414
- }
415
- return cols.toDataFrame()
416
- } catch (e: OutOfMemoryError ) {
355
+ readDelimImpl(
356
+ reader = reader,
357
+ format = format,
358
+ colTypes = colTypes,
359
+ skipLines = skipLines,
360
+ readLines = readLines,
361
+ parserOptions = parserOptions,
362
+ )
363
+ } catch (_: OutOfMemoryError ) {
417
364
throw OutOfMemoryError (
418
365
" Ran out of memory reading this CSV-like file. " +
419
366
" You can try our new experimental CSV reader by adding the dependency " +
420
367
" \" org.jetbrains.kotlinx:dataframe-csv:{VERSION}\" and using `DataFrame.readCsv()` instead of " +
421
368
" `DataFrame.readCSV()`." ,
422
369
)
423
370
}
424
- }
425
371
426
372
public fun AnyFrame.writeCSV (file : File , format : CSVFormat = CSVFormat .DEFAULT ): Unit =
427
373
writeCSV(FileWriter (file), format)
0 commit comments