@@ -5,24 +5,20 @@ import kotlinx.datetime.LocalDate
5
5
import kotlinx.datetime.LocalDateTime
6
6
import kotlinx.datetime.LocalTime
7
7
import org.apache.commons.csv.CSVFormat
8
- import org.apache.commons.csv.CSVRecord
9
8
import org.apache.commons.io.input.BOMInputStream
10
9
import org.jetbrains.kotlinx.dataframe.AnyFrame
11
10
import org.jetbrains.kotlinx.dataframe.AnyRow
12
- import org.jetbrains.kotlinx.dataframe.DataColumn
13
11
import org.jetbrains.kotlinx.dataframe.DataFrame
14
12
import org.jetbrains.kotlinx.dataframe.DataRow
15
13
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
16
14
import org.jetbrains.kotlinx.dataframe.annotations.OptInRefine
17
15
import org.jetbrains.kotlinx.dataframe.annotations.Refine
18
16
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
19
17
import org.jetbrains.kotlinx.dataframe.api.forEach
20
- import org.jetbrains.kotlinx.dataframe.api.toDataFrame
21
- import org.jetbrains.kotlinx.dataframe.api.tryParse
22
18
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadCsvMethod
23
19
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
24
- import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
25
20
import org.jetbrains.kotlinx.dataframe.impl.api.parse
21
+ import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
26
22
import org.jetbrains.kotlinx.dataframe.util.AS_URL
27
23
import org.jetbrains.kotlinx.dataframe.util.AS_URL_IMPORT
28
24
import org.jetbrains.kotlinx.dataframe.util.AS_URL_REPLACE
@@ -46,7 +42,6 @@ import java.nio.charset.Charset
46
42
import java.util.zip.GZIPInputStream
47
43
import kotlin.reflect.KClass
48
44
import kotlin.reflect.KType
49
- import kotlin.reflect.full.withNullability
50
45
import kotlin.reflect.typeOf
51
46
import kotlin.time.Duration
52
47
@@ -354,74 +349,24 @@ public fun DataFrame.Companion.readDelim(
354
349
skipLines : Int = 0,
355
350
readLines : Int? = null,
356
351
parserOptions : ParserOptions ? = null,
357
- ): AnyFrame {
352
+ ): AnyFrame =
358
353
try {
359
- var reader = reader
360
- if (skipLines > 0 ) {
361
- reader = BufferedReader (reader)
362
- repeat(skipLines) { reader.readLine() }
363
- }
364
-
365
- val csvParser = format.parse(reader)
366
- val records = if (readLines == null ) {
367
- csvParser.records
368
- } else {
369
- require(readLines >= 0 ) { " `readLines` must not be negative" }
370
- val records = ArrayList <CSVRecord >(readLines)
371
- val iter = csvParser.iterator()
372
- var count = readLines ? : 0
373
- while (iter.hasNext() && 0 < count-- ) {
374
- records.add(iter.next())
375
- }
376
- records
377
- }
378
-
379
- val columnNames = csvParser.headerNames.takeIf { it.isNotEmpty() }
380
- ? : (1 .. (records.firstOrNull()?.count() ? : 0 )).map { index -> " X$index " }
381
-
382
- val generator = ColumnNameGenerator ()
383
- val uniqueNames = columnNames.map { generator.addUnique(it) }
384
-
385
- val cols = uniqueNames.mapIndexed { colIndex, colName ->
386
- val defaultColType = colTypes[" .default" ]
387
- val colType = colTypes[colName] ? : defaultColType
388
- var hasNulls = false
389
- val values = records.map {
390
- if (it.isSet(colIndex)) {
391
- it[colIndex].ifEmpty {
392
- hasNulls = true
393
- null
394
- }
395
- } else {
396
- hasNulls = true
397
- null
398
- }
399
- }
400
- val column = DataColumn .createValueColumn(colName, values, typeOf<String >().withNullability(hasNulls))
401
- val skipTypes = when {
402
- colType != null ->
403
- // skip all types except the desired type
404
- ParserOptions .allTypesExcept(colType.toKType())
405
-
406
- else ->
407
- // respect the provided parser options
408
- parserOptions?.skipTypes ? : emptySet()
409
- }
410
- val adjustsedParserOptions = (parserOptions ? : ParserOptions ())
411
- .copy(skipTypes = skipTypes)
412
-
413
- return @mapIndexed column.tryParse(adjustsedParserOptions)
414
- }
415
- return cols.toDataFrame()
416
- } catch (e: OutOfMemoryError ) {
354
+ readDelimImpl(
355
+ reader = reader,
356
+ format = format,
357
+ colTypes = colTypes,
358
+ skipLines = skipLines,
359
+ readLines = readLines,
360
+ parserOptions = parserOptions,
361
+ )
362
+ } catch (_: OutOfMemoryError ) {
417
363
throw OutOfMemoryError (
418
364
" Ran out of memory reading this CSV-like file. " +
419
365
" You can try our new experimental CSV reader by adding the dependency " +
420
366
" \" org.jetbrains.kotlinx:dataframe-csv:{VERSION}\" and using `DataFrame.readCsv()` instead of " +
421
367
" `DataFrame.readCSV()`." ,
422
368
)
423
369
}
424
- }
425
370
426
371
public fun AnyFrame.writeCSV (file : File , format : CSVFormat = CSVFormat .DEFAULT ): Unit =
427
372
writeCSV(FileWriter (file), format)
0 commit comments