Skip to content

Commit 766cad1

Browse files
committed
added Path support for new csv reader
1 parent 6e34b73 commit 766cad1

File tree

11 files changed

+293
-9
lines changed

11 files changed

+293
-9
lines changed

dataframe-csv/api/dataframe-csv.api

Lines changed: 13 additions & 0 deletions
Large diffs are not rendered by default.

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/CommonReadDelimDocs.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import java.net.URL
2222
*
2323
* ##### Similar Functions
2424
* With the overloads of $[FunctionLinkArg]`()`, you can read any $[FileTypeArg] by [File][File],
25-
* [URL][URL], or [InputStream][InputStream].
25+
* [Path][java.nio.file.Path], [URL][URL], or [InputStream][InputStream].
2626
* Reading by file path or URL can also be done by passing a [String].
2727
*
2828
* For example, $[FunctionLinkArg]`("input.$[CommonReadDelimDocs.FileExtensionArg]")` or with some options:

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/CommonWriteDelimDocs.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import java.io.File
1111
/**
1212
* ### $[WriteOrConvertArg] [DataFrame] to $[FileTypeTitleArg] $[DataTitleArg]
1313
*
14-
* ${[WriteOrConvertArg]}s [this] [DataFrame] to a $[FileTypeArg] $[DataArg].
14+
* ${[WriteOrConvertArg]}s \[this\]\[this\] [DataFrame][DataFrame] to a $[FileTypeArg] $[DataArg].
1515
*
1616
* Parameters you can use to customize the process include, for instance, \[delimiter\],
1717
* \[includeHeader\], \[quoteMode\], and \[headerComments\].
@@ -20,7 +20,7 @@ import java.io.File
2020
* The integration is built upon {@include [DocumentationUrls.ApacheCsv]}.
2121
*
2222
* ##### Similar Functions
23-
* With overloads of $[FunctionLinkArg]`()`, you can write $[FileTypeArg] to [File][File],
23+
* With overloads of $[FunctionLinkArg]`()`, you can write $[FileTypeArg] to [File][File], [Path][java.nio.file.Path],
2424
* [Appendable], or [String].
2525
*
2626
* For example, $[FunctionLinkArg]`("output.$[CommonWriteDelimDocs.FileExtensionArg]")`

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ import org.jetbrains.kotlinx.dataframe.io.QuoteMode
1717
@Suppress("ktlint:standard:class-naming", "ClassName", "KDocUnresolvedReference")
1818
internal object DelimParams {
1919

20+
/** @param path The file path to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. */
21+
interface PATH_READ
22+
2023
/** @param file The file to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. */
2124
interface FILE_READ
2225

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Compression.kt

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.io
33
import java.io.File
44
import java.io.InputStream
55
import java.net.URL
6+
import java.nio.file.Path
67
import java.util.zip.GZIPInputStream
78
import java.util.zip.InflaterInputStream
89
import java.util.zip.ZipInputStream
@@ -97,11 +98,8 @@ public fun compressionStateOf(fileOrUrl: String): Compression<*> =
9798
else -> Compression.None
9899
}
99100

100-
public fun compressionStateOf(file: File): Compression<*> =
101-
when (file.extension) {
102-
"gz" -> Compression.Gzip
103-
"zip" -> Compression.Zip
104-
else -> Compression.None
105-
}
101+
public fun compressionStateOf(file: File): Compression<*> = compressionStateOf(file.name)
102+
103+
public fun compressionStateOf(path: Path): Compression<*> = compressionStateOf(path.fileName?.toString() ?: "")
106104

107105
public fun compressionStateOf(url: URL): Compression<*> = compressionStateOf(url.path)

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readCsv.kt

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.IGNORE_SURROUND
2222
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.INPUT_STREAM_READ
2323
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.PARSER_OPTIONS
2424
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.PARSE_PARALLEL
25+
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.PATH_READ
2526
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.QUOTE
2627
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.READ_LINES
2728
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.SKIP_LINES
@@ -31,6 +32,59 @@ import java.io.File
3132
import java.io.FileInputStream
3233
import java.io.InputStream
3334
import java.net.URL
35+
import java.nio.file.Path
36+
import kotlin.io.path.inputStream
37+
38+
/**
39+
* @include [CommonReadDelimDocs.CsvDocs]
40+
* @set [CommonReadDelimDocs.DataTitleArg] File
41+
* @set [CommonReadDelimDocs.DataArg] file
42+
* @include [PATH_READ]
43+
* @include [CSV_DELIMITER]
44+
* @include [COMPRESSION]
45+
* @include [CommonReadDelimDocs.CommonReadParams]
46+
*/
47+
public fun DataFrame.Companion.readCsv(
48+
path: Path,
49+
delimiter: Char = CSV_DELIMITER,
50+
header: List<String> = HEADER,
51+
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
52+
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
53+
compression: Compression<*> = compressionStateOf(path),
54+
colTypes: Map<String, ColType> = COL_TYPES,
55+
skipLines: Long = SKIP_LINES,
56+
readLines: Long? = READ_LINES,
57+
parserOptions: ParserOptions = PARSER_OPTIONS,
58+
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
59+
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
60+
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
61+
quote: Char = QUOTE,
62+
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
63+
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
64+
parseParallel: Boolean = PARSE_PARALLEL,
65+
): DataFrame<*> =
66+
path.inputStream().use {
67+
readDelimImpl(
68+
inputStream = it,
69+
delimiter = delimiter,
70+
header = header,
71+
hasFixedWidthColumns = hasFixedWidthColumns,
72+
fixedColumnWidths = fixedColumnWidths,
73+
compression = compression,
74+
colTypes = colTypes,
75+
skipLines = skipLines,
76+
readLines = readLines,
77+
parserOptions = parserOptions,
78+
ignoreEmptyLines = ignoreEmptyLines,
79+
allowMissingColumns = allowMissingColumns,
80+
ignoreExcessColumns = ignoreExcessColumns,
81+
quote = quote,
82+
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
83+
trimInsideQuoted = trimInsideQuoted,
84+
parseParallel = parseParallel,
85+
adjustCsvSpecs = ADJUST_CSV_SPECS,
86+
)
87+
}
3488

3589
/**
3690
* @include [CommonReadDelimDocs.CsvDocs]

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readDelim.kt

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.IGNORE_SURROUND
2222
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.INPUT_STREAM_READ
2323
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.PARSER_OPTIONS
2424
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.PARSE_PARALLEL
25+
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.PATH_READ
2526
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.QUOTE
2627
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.READ_LINES
2728
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.SKIP_LINES
@@ -31,13 +32,66 @@ import java.io.File
3132
import java.io.FileInputStream
3233
import java.io.InputStream
3334
import java.net.URL
35+
import java.nio.file.Path
36+
import kotlin.io.path.inputStream
3437

3538
/*
3639
* TODO these currently clash with :core's readDelim(Str) functions.
3740
* When those are deprecated, we can let the users fall-back to these.
3841
* They do the same as readCsv(Str).
3942
*/
4043

44+
/**
45+
* @include [CommonReadDelimDocs.DelimDocs]
46+
* @set [CommonReadDelimDocs.DataTitleArg] File
47+
* @set [CommonReadDelimDocs.DataArg] file
48+
* @include [PATH_READ]
49+
* @include [DELIM_DELIMITER]
50+
* @include [COMPRESSION]
51+
* @include [CommonReadDelimDocs.CommonReadParams]
52+
*/
53+
public fun DataFrame.Companion.readDelim(
54+
path: Path,
55+
delimiter: Char = DELIM_DELIMITER,
56+
header: List<String> = HEADER,
57+
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
58+
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
59+
compression: Compression<*> = compressionStateOf(path),
60+
colTypes: Map<String, ColType> = COL_TYPES,
61+
skipLines: Long = SKIP_LINES,
62+
readLines: Long? = READ_LINES,
63+
parserOptions: ParserOptions = PARSER_OPTIONS,
64+
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
65+
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
66+
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
67+
quote: Char = QUOTE,
68+
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
69+
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
70+
parseParallel: Boolean = PARSE_PARALLEL,
71+
): DataFrame<*> =
72+
path.inputStream().use {
73+
readDelimImpl(
74+
inputStream = it,
75+
delimiter = delimiter,
76+
header = header,
77+
hasFixedWidthColumns = hasFixedWidthColumns,
78+
fixedColumnWidths = fixedColumnWidths,
79+
compression = compression,
80+
colTypes = colTypes,
81+
skipLines = skipLines,
82+
readLines = readLines,
83+
parserOptions = parserOptions,
84+
ignoreEmptyLines = ignoreEmptyLines,
85+
allowMissingColumns = allowMissingColumns,
86+
ignoreExcessColumns = ignoreExcessColumns,
87+
quote = quote,
88+
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
89+
trimInsideQuoted = trimInsideQuoted,
90+
parseParallel = parseParallel,
91+
adjustCsvSpecs = ADJUST_CSV_SPECS,
92+
)
93+
}
94+
4195
/**
4296
* @include [CommonReadDelimDocs.DelimDocs]
4397
* @set [CommonReadDelimDocs.DataTitleArg] File

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readTsv.kt

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.IGNORE_SURROUND
2121
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.INPUT_STREAM_READ
2222
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.PARSER_OPTIONS
2323
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.PARSE_PARALLEL
24+
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.PATH_READ
2425
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.QUOTE
2526
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.READ_LINES
2627
import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.SKIP_LINES
@@ -31,6 +32,59 @@ import java.io.File
3132
import java.io.FileInputStream
3233
import java.io.InputStream
3334
import java.net.URL
35+
import java.nio.file.Path
36+
import kotlin.io.path.inputStream
37+
38+
/**
39+
* @include [CommonReadDelimDocs.TsvDocs]
40+
* @set [CommonReadDelimDocs.DataTitleArg] File
41+
* @set [CommonReadDelimDocs.DataArg] file
42+
* @include [PATH_READ]
43+
* @include [TSV_DELIMITER]
44+
* @include [COMPRESSION]
45+
* @include [CommonReadDelimDocs.CommonReadParams]
46+
*/
47+
public fun DataFrame.Companion.readTsv(
48+
path: Path,
49+
delimiter: Char = TSV_DELIMITER,
50+
header: List<String> = HEADER,
51+
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
52+
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
53+
compression: Compression<*> = compressionStateOf(path),
54+
colTypes: Map<String, ColType> = COL_TYPES,
55+
skipLines: Long = SKIP_LINES,
56+
readLines: Long? = READ_LINES,
57+
parserOptions: ParserOptions = PARSER_OPTIONS,
58+
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
59+
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
60+
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
61+
quote: Char = QUOTE,
62+
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
63+
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
64+
parseParallel: Boolean = PARSE_PARALLEL,
65+
): DataFrame<*> =
66+
path.inputStream().use {
67+
readDelimImpl(
68+
inputStream = it,
69+
delimiter = delimiter,
70+
header = header,
71+
hasFixedWidthColumns = hasFixedWidthColumns,
72+
fixedColumnWidths = fixedColumnWidths,
73+
compression = compression,
74+
colTypes = colTypes,
75+
skipLines = skipLines,
76+
readLines = readLines,
77+
parserOptions = parserOptions,
78+
ignoreEmptyLines = ignoreEmptyLines,
79+
allowMissingColumns = allowMissingColumns,
80+
ignoreExcessColumns = ignoreExcessColumns,
81+
quote = quote,
82+
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
83+
trimInsideQuoted = trimInsideQuoted,
84+
parseParallel = parseParallel,
85+
adjustCsvSpecs = ADJUST_CSV_SPECS,
86+
)
87+
}
3488

3589
/**
3690
* @include [CommonReadDelimDocs.TsvDocs]

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeCsv.kt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,42 @@ import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.WRITER_WRITE
1919
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
2020
import java.io.File
2121
import java.io.FileWriter
22+
import java.nio.file.Path
23+
import kotlin.io.path.writer
24+
25+
/**
26+
* @include [CommonWriteDelimDocs.CsvDocs]
27+
* @set [CommonWriteDelimDocs.WriteOrConvertArg] Write
28+
* @set [CommonWriteDelimDocs.DataTitleArg] File
29+
* @set [CommonWriteDelimDocs.DataArg] file
30+
* @include [PATH_WRITE]
31+
* @include [CSV_DELIMITER]
32+
* @include [CommonWriteDelimDocs.CommonWriteParams]
33+
*/
34+
public fun AnyFrame.writeCsv(
35+
path: Path,
36+
delimiter: Char = CSV_DELIMITER,
37+
includeHeader: Boolean = INCLUDE_HEADER,
38+
quote: Char? = QUOTE,
39+
quoteMode: QuoteMode = QUOTE_MODE,
40+
escapeChar: Char? = ESCAPE_CHAR,
41+
commentChar: Char? = COMMENT_CHAR,
42+
headerComments: List<String> = HEADER_COMMENTS,
43+
recordSeparator: String = RECORD_SEPARATOR,
44+
): Unit =
45+
writeDelimImpl(
46+
df = this,
47+
writer = path.writer(),
48+
delimiter = delimiter,
49+
includeHeader = includeHeader,
50+
quote = quote,
51+
quoteMode = quoteMode,
52+
escapeChar = escapeChar,
53+
commentChar = commentChar,
54+
headerComments = headerComments,
55+
recordSeparator = recordSeparator,
56+
adjustCsvFormat = ADJUST_CSV_FORMAT,
57+
)
2258

2359
/**
2460
* @include [CommonWriteDelimDocs.CsvDocs]

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeDelim.kt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,42 @@ import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.WRITER_WRITE
1919
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
2020
import java.io.File
2121
import java.io.FileWriter
22+
import java.nio.file.Path
23+
import kotlin.io.path.writer
24+
25+
/**
26+
* @include [CommonWriteDelimDocs.DelimDocs]
27+
* @set [CommonWriteDelimDocs.WriteOrConvertArg] Write
28+
* @set [CommonWriteDelimDocs.DataTitleArg] File
29+
* @set [CommonWriteDelimDocs.DataArg] file
30+
* @include [PATH_WRITE]
31+
* @include [DELIM_DELIMITER]
32+
* @include [CommonWriteDelimDocs.CommonWriteParams]
33+
*/
34+
public fun AnyFrame.writeDelim(
35+
path: Path,
36+
delimiter: Char = DELIM_DELIMITER,
37+
includeHeader: Boolean = INCLUDE_HEADER,
38+
quote: Char? = QUOTE,
39+
quoteMode: QuoteMode = QUOTE_MODE,
40+
escapeChar: Char? = ESCAPE_CHAR,
41+
commentChar: Char? = COMMENT_CHAR,
42+
headerComments: List<String> = HEADER_COMMENTS,
43+
recordSeparator: String = RECORD_SEPARATOR,
44+
): Unit =
45+
writeDelimImpl(
46+
df = this,
47+
writer = path.writer(),
48+
delimiter = delimiter,
49+
includeHeader = includeHeader,
50+
quote = quote,
51+
quoteMode = quoteMode,
52+
escapeChar = escapeChar,
53+
commentChar = commentChar,
54+
headerComments = headerComments,
55+
recordSeparator = recordSeparator,
56+
adjustCsvFormat = ADJUST_CSV_FORMAT,
57+
)
2258

2359
/**
2460
* @include [CommonWriteDelimDocs.DelimDocs]

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeTsv.kt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,42 @@ import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.WRITER_WRITE
1919
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
2020
import java.io.File
2121
import java.io.FileWriter
22+
import java.nio.file.Path
23+
import kotlin.io.path.writer
24+
25+
/**
26+
* @include [CommonWriteDelimDocs.TsvDocs]
27+
* @set [CommonWriteDelimDocs.WriteOrConvertArg] Write
28+
* @set [CommonWriteDelimDocs.DataTitleArg] File
29+
* @set [CommonWriteDelimDocs.DataArg] file
30+
* @include [PATH_WRITE]
31+
* @include [TSV_DELIMITER]
32+
* @include [CommonWriteDelimDocs.CommonWriteParams]
33+
*/
34+
public fun AnyFrame.writeTsv(
35+
path: Path,
36+
delimiter: Char = TSV_DELIMITER,
37+
includeHeader: Boolean = INCLUDE_HEADER,
38+
quote: Char? = QUOTE,
39+
quoteMode: QuoteMode = QUOTE_MODE,
40+
escapeChar: Char? = ESCAPE_CHAR,
41+
commentChar: Char? = COMMENT_CHAR,
42+
headerComments: List<String> = HEADER_COMMENTS,
43+
recordSeparator: String = RECORD_SEPARATOR,
44+
): Unit =
45+
writeDelimImpl(
46+
df = this,
47+
writer = path.writer(),
48+
delimiter = delimiter,
49+
includeHeader = includeHeader,
50+
quote = quote,
51+
quoteMode = quoteMode,
52+
escapeChar = escapeChar,
53+
commentChar = commentChar,
54+
headerComments = headerComments,
55+
recordSeparator = recordSeparator,
56+
adjustCsvFormat = ADJUST_CSV_FORMAT,
57+
)
2258

2359
/**
2460
* @include [CommonWriteDelimDocs.TsvDocs]

0 commit comments

Comments
 (0)