Skip to content

Commit 0b0776c

Browse files
committed
added OOM message pointing to new csv implementation
1 parent d40fe77 commit 0b0776c

File tree

1 file changed

+54
-45
lines changed
  • core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io

1 file changed

+54
-45
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt

+54-45
Original file line numberDiff line numberDiff line change
@@ -351,61 +351,70 @@ public fun DataFrame.Companion.readDelim(
351351
readLines: Int? = null,
352352
parserOptions: ParserOptions? = null,
353353
): AnyFrame {
354-
var reader = reader
355-
if (skipLines > 0) {
356-
reader = BufferedReader(reader)
357-
repeat(skipLines) { reader.readLine() }
358-
}
359-
360-
val csvParser = format.parse(reader)
361-
val records = if (readLines == null) {
362-
csvParser.records
363-
} else {
364-
require(readLines >= 0) { "`readLines` must not be negative" }
365-
val records = ArrayList<CSVRecord>(readLines)
366-
val iter = csvParser.iterator()
367-
var count = readLines ?: 0
368-
while (iter.hasNext() && 0 < count--) {
369-
records.add(iter.next())
354+
try {
355+
var reader = reader
356+
if (skipLines > 0) {
357+
reader = BufferedReader(reader)
358+
repeat(skipLines) { reader.readLine() }
370359
}
371-
records
372-
}
373-
374-
val columnNames = csvParser.headerNames.takeIf { it.isNotEmpty() }
375-
?: (1..(records.firstOrNull()?.count() ?: 0)).map { index -> "X$index" }
376360

377-
val generator = ColumnNameGenerator()
378-
val uniqueNames = columnNames.map { generator.addUnique(it) }
361+
val csvParser = format.parse(reader)
362+
val records = if (readLines == null) {
363+
csvParser.records
364+
} else {
365+
require(readLines >= 0) { "`readLines` must not be negative" }
366+
val records = ArrayList<CSVRecord>(readLines)
367+
val iter = csvParser.iterator()
368+
var count = readLines ?: 0
369+
while (iter.hasNext() && 0 < count--) {
370+
records.add(iter.next())
371+
}
372+
records
373+
}
379374

380-
val cols = uniqueNames.mapIndexed { colIndex, colName ->
381-
val defaultColType = colTypes[".default"]
382-
val colType = colTypes[colName] ?: defaultColType
383-
var hasNulls = false
384-
val values = records.map {
385-
if (it.isSet(colIndex)) {
386-
it[colIndex].ifEmpty {
375+
val columnNames = csvParser.headerNames.takeIf { it.isNotEmpty() }
376+
?: (1..(records.firstOrNull()?.count() ?: 0)).map { index -> "X$index" }
377+
378+
val generator = ColumnNameGenerator()
379+
val uniqueNames = columnNames.map { generator.addUnique(it) }
380+
381+
val cols = uniqueNames.mapIndexed { colIndex, colName ->
382+
val defaultColType = colTypes[".default"]
383+
val colType = colTypes[colName] ?: defaultColType
384+
var hasNulls = false
385+
val values = records.map {
386+
if (it.isSet(colIndex)) {
387+
it[colIndex].ifEmpty {
388+
hasNulls = true
389+
null
390+
}
391+
} else {
387392
hasNulls = true
388393
null
389394
}
390-
} else {
391-
hasNulls = true
392-
null
393395
}
394-
}
395-
val column = DataColumn.createValueColumn(colName, values, typeOf<String>().withNullability(hasNulls))
396-
when (colType) {
397-
null -> column.tryParse(parserOptions)
398-
399-
else -> {
400-
column.tryParse(
401-
(parserOptions ?: ParserOptions()).copy(
402-
skipTypes = ParserOptions.allTypesExcept(colType.toKType()),
403-
),
404-
)
396+
val column = DataColumn.createValueColumn(colName, values, typeOf<String>().withNullability(hasNulls))
397+
when (colType) {
398+
null -> column.tryParse(parserOptions)
399+
400+
else -> {
401+
column.tryParse(
402+
(parserOptions ?: ParserOptions()).copy(
403+
skipTypes = ParserOptions.allTypesExcept(colType.toKType()),
404+
),
405+
)
406+
}
405407
}
406408
}
409+
return cols.toDataFrame()
410+
} catch (e: OutOfMemoryError) {
411+
throw OutOfMemoryError(
412+
"Ran out of memory reading this CSV-like file. " +
413+
"You can try our new experimental CSV reader by adding the dependency " +
414+
"\"org.jetbrains.kotlinx:dataframe-csv:{VERSION}\" and using `DataFrame.readCsv()` instead of " +
415+
"`DataFrame.readCSV()`. This requires `@OptIn(ExperimentalCsv::class)`.",
416+
)
407417
}
408-
return cols.toDataFrame()
409418
}
410419

411420
public fun AnyFrame.writeCSV(file: File, format: CSVFormat = CSVFormat.DEFAULT): Unit =

0 commit comments

Comments
 (0)