diff --git a/build.gradle.kts b/build.gradle.kts index 13e2c3a44b..18b12e0a66 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -55,8 +55,7 @@ dependencies { api(project(":dataframe-excel")) api(project(":dataframe-openapi")) api(project(":dataframe-jdbc")) - // TODO enable when it leaves the experimental phase - // api(project(":dataframe-csv")) + api(project(":dataframe-csv")) kover(project(":core")) kover(project(":dataframe-arrow")) diff --git a/core/api/core.api b/core/api/core.api index 6d27b97373..d8a32259c7 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -2646,57 +2646,57 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/api/ColumnDescri } public final class org/jetbrains/kotlinx/dataframe/api/ColumnDescription_ExtensionsKt { - public static final fun ColumnDescription_count (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_count (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_count (Lorg/jetbrains/kotlinx/dataframe/DataRow;)I - public static final fun ColumnDescription_freq (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_freq (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_freq (Lorg/jetbrains/kotlinx/dataframe/DataRow;)I - public static final fun ColumnDescription_max (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_max (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_max (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; - public static final fun ColumnDescription_mean (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_mean (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_mean (Lorg/jetbrains/kotlinx/dataframe/DataRow;)D - public static final fun ColumnDescription_median (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_median (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_median (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; - public static final fun ColumnDescription_min (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_min (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_min (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; - public static final fun ColumnDescription_name (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_name (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_name (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/String; - public static final fun ColumnDescription_nulls (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_nulls (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_nulls (Lorg/jetbrains/kotlinx/dataframe/DataRow;)I - public static final fun ColumnDescription_path (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_path (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_path (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Lorg/jetbrains/kotlinx/dataframe/columns/ColumnPath; - public static final fun ColumnDescription_std (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_std (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_std (Lorg/jetbrains/kotlinx/dataframe/DataRow;)D - public static final fun ColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; - public static final fun ColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/String; - public static final fun ColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/DataRow;)I - public static final fun NullableColumnDescription_count (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_count (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_count (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Integer; - public static final fun NullableColumnDescription_freq (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_freq (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_freq (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Integer; - public static final fun NullableColumnDescription_max (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_max (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_max (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; - public static final fun NullableColumnDescription_mean (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_mean (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_mean (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Double; - public static final fun NullableColumnDescription_median (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_median (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_median (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; - public static final fun NullableColumnDescription_min (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_min (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_min (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; - public static final fun NullableColumnDescription_name (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_name (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_name (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/String; - public static final fun NullableColumnDescription_nulls (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_nulls (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_nulls (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Integer; - public static final fun NullableColumnDescription_path (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_path (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_path (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Lorg/jetbrains/kotlinx/dataframe/columns/ColumnPath; - public static final fun NullableColumnDescription_std (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_std (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_std (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Double; - public static final fun NullableColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; - public static final fun NullableColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/String; - public static final fun NullableColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Integer; } @@ -5799,13 +5799,13 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/api/KeyValueProp } public final class org/jetbrains/kotlinx/dataframe/api/KeyValueProperty_ExtensionsKt { - public static final fun KeyValueProperty_key (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun KeyValueProperty_key (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun KeyValueProperty_key (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/String; - public static final fun KeyValueProperty_value (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun KeyValueProperty_value (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun KeyValueProperty_value (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; - public static final fun NullableKeyValueProperty_key (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableKeyValueProperty_key (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableKeyValueProperty_key (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/String; - public static final fun NullableKeyValueProperty_value (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableKeyValueProperty_value (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableKeyValueProperty_value (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; } @@ -6299,13 +6299,13 @@ public final class org/jetbrains/kotlinx/dataframe/api/NameValuePair { } public final class org/jetbrains/kotlinx/dataframe/api/NameValuePair_ExtensionsKt { - public static final fun NameValuePair_name (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NameValuePair_name (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NameValuePair_name (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/String; - public static final fun NameValuePair_value (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NameValuePair_value (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NameValuePair_value (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; - public static final fun NullableNameValuePair_name (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableNameValuePair_name (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableNameValuePair_name (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/String; - public static final fun NullableNameValuePair_value (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableNameValuePair_value (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableNameValuePair_value (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; } @@ -8859,9 +8859,9 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/api/ValueCount { } public final class org/jetbrains/kotlinx/dataframe/api/ValueCount_ExtensionsKt { - public static final fun NullableValueCount_count (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableValueCount_count (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableValueCount_count (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Integer; - public static final fun ValueCount_count (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ValueCount_count (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ValueCount_count (Lorg/jetbrains/kotlinx/dataframe/DataRow;)I } @@ -8883,9 +8883,9 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/api/ValuePropert } public final class org/jetbrains/kotlinx/dataframe/api/ValueProperty_ExtensionsKt { - public static final fun NullableValueProperty_value (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun NullableValueProperty_value (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableValueProperty_value (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; - public static final fun ValueProperty_value (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun ValueProperty_value (Lorg/jetbrains/kotlinx/dataframe/ColumnsScope;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ValueProperty_value (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; } @@ -10203,6 +10203,65 @@ public final class org/jetbrains/kotlinx/dataframe/io/CommonKt { public static final fun urlAsFile (Ljava/net/URL;)Ljava/io/File; } +public abstract interface class org/jetbrains/kotlinx/dataframe/io/Compression { + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/Compression$Companion; + public abstract fun doFinally (Ljava/io/InputStream;)V + public abstract fun doFirst (Ljava/io/InputStream;)V + public abstract fun wrapStream (Ljava/io/InputStream;)Ljava/io/InputStream; +} + +public final class org/jetbrains/kotlinx/dataframe/io/Compression$Companion { + public final fun of (Ljava/io/File;)Lorg/jetbrains/kotlinx/dataframe/io/Compression; + public final fun of (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/Compression; + public final fun of (Ljava/net/URL;)Lorg/jetbrains/kotlinx/dataframe/io/Compression; + public final fun of (Ljava/nio/file/Path;)Lorg/jetbrains/kotlinx/dataframe/io/Compression; +} + +public final class org/jetbrains/kotlinx/dataframe/io/Compression$DefaultImpls { + public static fun doFinally (Lorg/jetbrains/kotlinx/dataframe/io/Compression;Ljava/io/InputStream;)V + public static fun doFirst (Lorg/jetbrains/kotlinx/dataframe/io/Compression;Ljava/io/InputStream;)V +} + +public final class org/jetbrains/kotlinx/dataframe/io/Compression$Gzip : org/jetbrains/kotlinx/dataframe/io/Compression { + public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/Compression$Gzip; + public synthetic fun doFinally (Ljava/io/InputStream;)V + public fun doFinally (Ljava/util/zip/GZIPInputStream;)V + public synthetic fun doFirst (Ljava/io/InputStream;)V + public fun doFirst (Ljava/util/zip/GZIPInputStream;)V + public fun equals (Ljava/lang/Object;)Z + public fun hashCode ()I + public fun toString ()Ljava/lang/String; + public synthetic fun wrapStream (Ljava/io/InputStream;)Ljava/io/InputStream; + public fun wrapStream (Ljava/io/InputStream;)Ljava/util/zip/GZIPInputStream; +} + +public final class org/jetbrains/kotlinx/dataframe/io/Compression$None : org/jetbrains/kotlinx/dataframe/io/Compression { + public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/Compression$None; + public fun doFinally (Ljava/io/InputStream;)V + public fun doFirst (Ljava/io/InputStream;)V + public fun equals (Ljava/lang/Object;)Z + public fun hashCode ()I + public fun toString ()Ljava/lang/String; + public fun wrapStream (Ljava/io/InputStream;)Ljava/io/InputStream; +} + +public final class org/jetbrains/kotlinx/dataframe/io/Compression$Zip : org/jetbrains/kotlinx/dataframe/io/Compression { + public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/Compression$Zip; + public synthetic fun doFinally (Ljava/io/InputStream;)V + public fun doFinally (Ljava/util/zip/ZipInputStream;)V + public synthetic fun doFirst (Ljava/io/InputStream;)V + public fun doFirst (Ljava/util/zip/ZipInputStream;)V + public fun equals (Ljava/lang/Object;)Z + public fun hashCode ()I + public fun toString ()Ljava/lang/String; + public synthetic fun wrapStream (Ljava/io/InputStream;)Ljava/io/InputStream; + public fun wrapStream (Ljava/io/InputStream;)Ljava/util/zip/ZipInputStream; +} + +public final class org/jetbrains/kotlinx/dataframe/io/CompressionKt { + public static final fun useDecompressed (Ljava/io/InputStream;Lorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object; +} + public final class org/jetbrains/kotlinx/dataframe/io/CsvKt { public static final fun asURL (Ljava/lang/String;)Ljava/net/URL; public static final fun read (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/util/Map;ILjava/lang/Integer;ZLjava/nio/charset/Charset;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; @@ -10219,7 +10278,7 @@ public final class org/jetbrains/kotlinx/dataframe/io/CsvKt { public static final fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/Reader;Lorg/apache/commons/csv/CSVFormat;Ljava/util/Map;ILjava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLorg/jetbrains/kotlinx/dataframe/io/CSVType;Ljava/util/Map;ILjava/lang/Integer;ZLjava/nio/charset/Charset;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/Reader;Lorg/apache/commons/csv/CSVFormat;Ljava/util/Map;ILjava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readDelimStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/Map;ILjava/lang/Integer;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final synthetic fun readDelimStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/Map;ILjava/lang/Integer;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readDelimStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/Map;ILjava/lang/Integer;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun toCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lorg/apache/commons/csv/CSVFormat;)Ljava/lang/String; public static synthetic fun toCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lorg/apache/commons/csv/CSVFormat;ILjava/lang/Object;)Ljava/lang/String; diff --git a/core/build.gradle.kts b/core/build.gradle.kts index d519249af9..a34789cc5a 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -85,6 +85,9 @@ dependencies { testImplementation(libs.kotlin.scriptingJvm) testImplementation(libs.jsoup) testImplementation(libs.sl4jsimple) + + // for JupyterCodegenTests and samples.api + testImplementation(project(":dataframe-csv")) } val samplesImplementation by configurations.getting { diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt index 7d43807d80..ddc5b0f2ab 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt @@ -12,7 +12,6 @@ import org.jetbrains.kotlinx.dataframe.impl.api.StringParser import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser -import org.jetbrains.kotlinx.dataframe.io.readCSV import org.jetbrains.kotlinx.dataframe.typeClass import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS_COPY @@ -27,7 +26,7 @@ import kotlin.reflect.KType * These options are used to configure how [DataColumns][DataColumn] of type [String] or [String?][String] * should be parsed. * You can always pass a [ParserOptions] object to functions that perform parsing, like [tryParse], [parse], - * or even [DataFrame.readCSV][DataFrame.Companion.readCSV] to override these options. + * or even [DataFrame.readCsv][DataFrame.Companion.readCsv] to override these options. */ public val DataFrame.Companion.parser: GlobalParserOptions get() = Parsers diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 2bb8ca07ec..1073fcc074 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -19,18 +19,35 @@ import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadCsvMethod import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod import org.jetbrains.kotlinx.dataframe.impl.api.parse import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl +import org.jetbrains.kotlinx.dataframe.util.APACHE_CSV import org.jetbrains.kotlinx.dataframe.util.AS_URL import org.jetbrains.kotlinx.dataframe.util.AS_URL_IMPORT import org.jetbrains.kotlinx.dataframe.util.AS_URL_REPLACE import org.jetbrains.kotlinx.dataframe.util.DF_READ_NO_CSV import org.jetbrains.kotlinx.dataframe.util.DF_READ_NO_CSV_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_CSV +import org.jetbrains.kotlinx.dataframe.util.READ_CSV_FILE_OR_URL_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_CSV_FILE_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_CSV_IMPORT +import org.jetbrains.kotlinx.dataframe.util.READ_CSV_STREAM_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_CSV_URL_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_DELIM +import org.jetbrains.kotlinx.dataframe.util.READ_DELIM_READER_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_DELIM_STREAM_REPLACE +import org.jetbrains.kotlinx.dataframe.util.TO_CSV +import org.jetbrains.kotlinx.dataframe.util.TO_CSV_IMPORT +import org.jetbrains.kotlinx.dataframe.util.TO_CSV_REPLACE +import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV +import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV_FILE_REPLACE +import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV_IMPORT +import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV_PATH_REPLACE +import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV_WRITER_REPLACE import org.jetbrains.kotlinx.dataframe.values import java.io.BufferedInputStream import java.io.BufferedReader import java.io.File import java.io.FileInputStream import java.io.FileWriter -import java.io.IOException import java.io.InputStream import java.io.InputStreamReader import java.io.Reader @@ -46,6 +63,7 @@ import kotlin.reflect.KType import kotlin.reflect.typeOf import kotlin.time.Duration +@Deprecated(message = APACHE_CSV, level = DeprecationLevel.WARNING) public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat { override fun readDataFrame(stream: InputStream, header: List): AnyFrame = DataFrame.readCSV(stream = stream, delimiter = delimiter, header = header) @@ -57,7 +75,7 @@ public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat { override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough - override val testOrder: Int = 20000 + override val testOrder: Int = 20_001 override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod { val arguments = MethodArguments().add("delimiter", typeOf(), "'%L'", delimiter) @@ -65,6 +83,10 @@ public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat { } } +@Deprecated( + message = APACHE_CSV, + level = DeprecationLevel.WARNING, +) public enum class CSVType(public val format: CSVFormat) { DEFAULT( CSVFormat.DEFAULT.builder() @@ -81,12 +103,19 @@ public enum class CSVType(public val format: CSVFormat) { private val defaultCharset = Charsets.UTF_8 +@Deprecated("", level = DeprecationLevel.WARNING) internal fun isCompressed(fileOrUrl: String) = listOf("gz", "zip").contains(fileOrUrl.split(".").last()) +@Deprecated("", level = DeprecationLevel.WARNING) internal fun isCompressed(file: File) = listOf("gz", "zip").contains(file.extension) +@Deprecated("", level = DeprecationLevel.WARNING) internal fun isCompressed(url: URL) = isCompressed(url.path) +@Deprecated( + message = APACHE_CSV, + level = DeprecationLevel.HIDDEN, // clashes with the new readDelim +) @Refine @Interpretable("ReadDelimStr") public fun DataFrame.Companion.readDelimStr( @@ -106,7 +135,7 @@ public fun DataFrame.Companion.readDelimStr( @Deprecated( message = DF_READ_NO_CSV, - replaceWith = ReplaceWith(DF_READ_NO_CSV_REPLACE), + replaceWith = ReplaceWith(DF_READ_NO_CSV_REPLACE, READ_CSV_IMPORT), level = DeprecationLevel.ERROR, ) public fun DataFrame.Companion.read( @@ -118,22 +147,13 @@ public fun DataFrame.Companion.read( readLines: Int? = null, duplicate: Boolean = true, charset: Charset = Charsets.UTF_8, -): DataFrame<*> = - catchHttpResponse(asUrl(fileOrUrl)) { - readDelim( - it, - delimiter, - header, - isCompressed(fileOrUrl), - getCSVType(fileOrUrl), - colTypes, - skipLines, - readLines, - duplicate, - charset, - ) - } +): DataFrame<*> = error(DF_READ_NO_CSV) +@Deprecated( + message = READ_CSV, + replaceWith = ReplaceWith(READ_CSV_FILE_OR_URL_REPLACE, READ_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) @OptInRefine @Interpretable("ReadCSV0") public fun DataFrame.Companion.readCSV( @@ -163,6 +183,11 @@ public fun DataFrame.Companion.readCSV( ) } +@Deprecated( + message = READ_CSV, + replaceWith = ReplaceWith(READ_CSV_FILE_REPLACE, READ_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readCSV( file: File, delimiter: Char = ',', @@ -188,6 +213,11 @@ public fun DataFrame.Companion.readCSV( parserOptions, ) +@Deprecated( + message = READ_CSV, + replaceWith = ReplaceWith(READ_CSV_URL_REPLACE, READ_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readCSV( url: URL, delimiter: Char = ',', @@ -212,6 +242,11 @@ public fun DataFrame.Companion.readCSV( parserOptions, ) +@Deprecated( + message = READ_CSV, + replaceWith = ReplaceWith(READ_CSV_STREAM_REPLACE, READ_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readCSV( stream: InputStream, delimiter: Char = ',', @@ -238,13 +273,6 @@ public fun DataFrame.Companion.readCSV( parserOptions, ) -private fun getCSVType(path: String): CSVType = - when (path.substringAfterLast('.').lowercase()) { - "csv" -> CSVType.DEFAULT - "tdf" -> CSVType.TDF - else -> throw IOException("Unknown file format") - } - @Deprecated( message = AS_URL, replaceWith = ReplaceWith(AS_URL_REPLACE, AS_URL_IMPORT), @@ -264,6 +292,11 @@ private fun getFormat( .setAllowMissingColumnNames(duplicate) .build() +@Deprecated( + message = READ_DELIM, + replaceWith = ReplaceWith(READ_DELIM_STREAM_REPLACE), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readDelim( inStream: InputStream, delimiter: Char = ',', @@ -343,6 +376,11 @@ public fun ColType.toKType(): KType = ColType.Char -> typeOf() } +@Deprecated( + message = READ_DELIM, + replaceWith = ReplaceWith(READ_DELIM_READER_REPLACE), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readDelim( reader: Reader, format: CSVFormat = CSVFormat.DEFAULT.builder() @@ -371,12 +409,27 @@ public fun DataFrame.Companion.readDelim( ) } +@Deprecated( + message = WRITE_CSV, + replaceWith = ReplaceWith(WRITE_CSV_FILE_REPLACE, WRITE_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun AnyFrame.writeCSV(file: File, format: CSVFormat = CSVFormat.DEFAULT): Unit = writeCSV(FileWriter(file), format) +@Deprecated( + message = WRITE_CSV, + replaceWith = ReplaceWith(WRITE_CSV_PATH_REPLACE, WRITE_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun AnyFrame.writeCSV(path: String, format: CSVFormat = CSVFormat.DEFAULT): Unit = writeCSV(FileWriter(path), format) +@Deprecated( + message = WRITE_CSV, + replaceWith = ReplaceWith(WRITE_CSV_WRITER_REPLACE, WRITE_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun AnyFrame.writeCSV(writer: Appendable, format: CSVFormat = CSVFormat.DEFAULT) { format.print(writer).use { printer -> if (!format.skipHeaderRecord) { @@ -395,6 +448,11 @@ public fun AnyFrame.writeCSV(writer: Appendable, format: CSVFormat = CSVFormat.D } } +@Deprecated( + message = TO_CSV, + replaceWith = ReplaceWith(TO_CSV_REPLACE, TO_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun AnyFrame.toCsv(format: CSVFormat = CSVFormat.DEFAULT): String = StringWriter().use { this.writeCSV(it, format) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index a507c218d5..d321d5f1dc 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -5,12 +5,23 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.ParserOptions import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadTsvMethod +import org.jetbrains.kotlinx.dataframe.util.APACHE_CSV +import org.jetbrains.kotlinx.dataframe.util.READ_TSV +import org.jetbrains.kotlinx.dataframe.util.READ_TSV_FILE_OR_URL_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_TSV_FILE_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_TSV_IMPORT +import org.jetbrains.kotlinx.dataframe.util.READ_TSV_STREAM_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_TSV_URL_REPLACE import java.io.File import java.io.FileInputStream import java.io.InputStream import java.net.URL import java.nio.charset.Charset +@Deprecated( + message = APACHE_CSV, + level = DeprecationLevel.WARNING, +) public class TSV : SupportedDataFrameFormat { override fun readDataFrame(stream: InputStream, header: List): AnyFrame = DataFrame.readTSV(stream, header = header) @@ -21,7 +32,7 @@ public class TSV : SupportedDataFrameFormat { override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough - override val testOrder: Int = 30000 + override val testOrder: Int = 30_001 override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod = DefaultReadTsvMethod(pathRepresentation) @@ -29,6 +40,11 @@ public class TSV : SupportedDataFrameFormat { private const val TAB_CHAR = '\t' +@Deprecated( + message = READ_TSV, + replaceWith = ReplaceWith(READ_TSV_FILE_OR_URL_REPLACE, READ_TSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readTSV( fileOrUrl: String, header: List = listOf(), @@ -55,6 +71,11 @@ public fun DataFrame.Companion.readTSV( ) } +@Deprecated( + message = READ_TSV, + replaceWith = ReplaceWith(READ_TSV_FILE_REPLACE, READ_TSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readTSV( file: File, header: List = listOf(), @@ -77,6 +98,11 @@ public fun DataFrame.Companion.readTSV( charset, ) +@Deprecated( + message = READ_TSV, + replaceWith = ReplaceWith(READ_TSV_URL_REPLACE, READ_TSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readTSV( url: URL, header: List = listOf(), @@ -99,6 +125,11 @@ public fun DataFrame.Companion.readTSV( parserOptions, ) +@Deprecated( + message = READ_TSV, + replaceWith = ReplaceWith(READ_TSV_STREAM_REPLACE, READ_TSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readTSV( stream: InputStream, header: List = listOf(), diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt index 12f1e15c2a..f1581e1154 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt @@ -157,8 +157,7 @@ internal class Integration(private val notebook: Notebook, private val options: override fun Builder.onLoaded() { if (version != null) { if (enableExperimentalCsv?.toBoolean() == true) { - println("Enabling experimental CSV module: dataframe-csv") - dependencies("org.jetbrains.kotlinx:dataframe-csv:$version") + println("CSV module is already enabled by default now.") } if (enableExperimentalGeo?.toBoolean() == true) { println("Enabling experimental Geo module: dataframe-geo") @@ -166,6 +165,7 @@ internal class Integration(private val notebook: Notebook, private val options: dependencies("org.jetbrains.kotlinx:dataframe-geo:$version") } dependencies( + "org.jetbrains.kotlinx:dataframe-csv:$version", "org.jetbrains.kotlinx:dataframe-excel:$version", "org.jetbrains.kotlinx:dataframe-jdbc:$version", "org.jetbrains.kotlinx:dataframe-arrow:$version", diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index b2e213e07b..a500535206 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -11,9 +11,9 @@ package org.jetbrains.kotlinx.dataframe.util private const val MESSAGE_0_16 = "Will be ERROR in 0.16." -internal const val DF_READ_NO_CSV = "This function is deprecated and should be replaced with `readCSV`. $MESSAGE_0_16" +internal const val DF_READ_NO_CSV = "This function is deprecated and should be replaced with `readCsv`. $MESSAGE_0_16" internal const val DF_READ_NO_CSV_REPLACE = - "this.readCSV(fileOrUrl, delimiter, header, colTypes, skipLines, readLines, duplicate, charset)" + "this.readCsv(fileOrUrl = fileOrUrl, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" internal const val CREATE_FRAME_COLUMN = "Removed from public API as this can likely better be solved by `DataFrame.chunked()`. Replaced by internal df.chunkedImpl(). $MESSAGE_0_16" @@ -66,6 +66,50 @@ internal const val MINUS_REPLACE = "this.remove(columns)" private const val MESSAGE_0_17 = "Will be ERROR in 0.17." +internal const val APACHE_CSV = + "The Apache-based CSV/TSV reader is deprecated in favor of the new Deephaven CSV reader in dataframe-csv. $MESSAGE_0_17" +internal const val READ_CSV = + "Apache-based readCSV() is deprecated in favor of Deephaven-based readCsv() in dataframe-csv. $MESSAGE_0_17" +internal const val READ_CSV_IMPORT = "org.jetbrains.kotlinx.dataframe.io.readCsv" +internal const val READ_CSV_FILE_OR_URL_REPLACE = + "this.readCsv(fileOrUrl = fileOrUrl, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_CSV_FILE_REPLACE = + "this.readCsv(file = file, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_CSV_URL_REPLACE = + "this.readCsv(url = url, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_CSV_STREAM_REPLACE = + "this.readCsv(inputStream = stream, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" + +internal const val READ_DELIM = + "Apache-based readDelim() is deprecated in favor of Deephaven-based readDelim() in dataframe-csv. $MESSAGE_0_17" +internal const val READ_DELIM_STREAM_REPLACE = + "this.readDelim(inputStream = inStream, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_DELIM_READER_REPLACE = + "this.readDelimStr(text = reader.readText(), delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" + +internal const val READ_TSV = + "Apache-based readTSV() is deprecated in favor of Deephaven-based readTsv() in dataframe-csv. $MESSAGE_0_17" +internal const val READ_TSV_IMPORT = "org.jetbrains.kotlinx.dataframe.io.readTsv" +internal const val READ_TSV_FILE_OR_URL_REPLACE = + "this.readTsv(fileOrUrl = fileOrUrl, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_TSV_FILE_REPLACE = + "this.readTsv(file = file, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_TSV_URL_REPLACE = + "this.readTsv(url = url, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_TSV_STREAM_REPLACE = + "this.readTsv(inputStream = stream, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" + +internal const val WRITE_CSV = + "The writeCSV() functions are deprecated in favor of writeCsv() in dataframe-csv. $MESSAGE_0_17" +internal const val WRITE_CSV_IMPORT = "org.jetbrains.kotlinx.dataframe.io.writeCsv" +internal const val WRITE_CSV_FILE_REPLACE = "this.writeCsv(file = file)" +internal const val WRITE_CSV_PATH_REPLACE = "this.writeCsv(path = path)" +internal const val WRITE_CSV_WRITER_REPLACE = "this.writeCsv(writer = writer)" + +internal const val TO_CSV = "toCsv() is deprecated in favor of toCsvStr() in dataframe-csv. $MESSAGE_0_17" +internal const val TO_CSV_IMPORT = "org.jetbrains.kotlinx.dataframe.io.toCsvStr" +internal const val TO_CSV_REPLACE = "this.toCsvStr()" + // endregion // region keep across releases diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/CsvTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/CsvTests.kt deleted file mode 100644 index 6a9d67a121..0000000000 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/CsvTests.kt +++ /dev/null @@ -1,366 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.io - -import io.kotest.assertions.throwables.shouldNotThrowAny -import io.kotest.matchers.nulls.shouldNotBeNull -import io.kotest.matchers.shouldBe -import kotlinx.datetime.LocalDateTime -import org.apache.commons.csv.CSVFormat -import org.intellij.lang.annotations.Language -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.api.ParserOptions -import org.jetbrains.kotlinx.dataframe.api.allNulls -import org.jetbrains.kotlinx.dataframe.api.convert -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.group -import org.jetbrains.kotlinx.dataframe.api.groupBy -import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.isEmpty -import org.jetbrains.kotlinx.dataframe.api.schema -import org.jetbrains.kotlinx.dataframe.api.toStr -import org.jetbrains.kotlinx.dataframe.ncol -import org.jetbrains.kotlinx.dataframe.nrow -import org.jetbrains.kotlinx.dataframe.testCsv -import org.jetbrains.kotlinx.dataframe.testResource -import org.junit.Test -import java.io.File -import java.io.StringReader -import java.io.StringWriter -import java.net.URL -import java.util.Locale -import kotlin.reflect.KClass -import kotlin.reflect.typeOf - -@Suppress("ktlint:standard:argument-list-wrapping") -class CsvTests { - - @Test - fun readNulls() { - val src = - """ - first,second - 2,, - 3,, - """.trimIndent() - val df = DataFrame.readDelimStr(src) - df.nrow shouldBe 2 - df.ncol shouldBe 2 - df["first"].type() shouldBe typeOf() - df["second"].allNulls() shouldBe true - df["second"].type() shouldBe typeOf() - } - - @Test - fun write() { - val df = dataFrameOf("col1", "col2")( - 1, null, - 2, null, - ).convert("col2").toStr() - - val str = StringWriter() - df.writeCSV(str) - - val res = DataFrame.readDelimStr(str.buffer.toString()) - - res shouldBe df - } - - @Test - fun readCSV() { - val df = DataFrame.read(simpleCsv) - - df.ncol shouldBe 11 - df.nrow shouldBe 5 - df.columnNames()[5] shouldBe "duplicate1" - df.columnNames()[6] shouldBe "duplicate11" - df["duplicate1"].type() shouldBe typeOf() - df["double"].type() shouldBe typeOf() - df["time"].type() shouldBe typeOf() - - println(df) - } - - @Test - fun readCsvWithFrenchLocaleAndAlternativeDelimiter() { - val df = DataFrame.readCSV( - url = csvWithFrenchLocale, - delimiter = ';', - parserOptions = ParserOptions(locale = Locale.FRENCH), - ) - - df.ncol shouldBe 11 - df.nrow shouldBe 5 - df.columnNames()[5] shouldBe "duplicate1" - df.columnNames()[6] shouldBe "duplicate11" - df["duplicate1"].type() shouldBe typeOf() - df["double"].type() shouldBe typeOf() - df["number"].type() shouldBe typeOf() - df["time"].type() shouldBe typeOf() - - println(df) - } - - @Test - fun readCsvWithFloats() { - val df = DataFrame.readCSV(wineCsv, delimiter = ';') - val schema = df.schema() - - fun assertColumnType(columnName: String, kClass: KClass<*>) { - val col = schema.columns[columnName] - col.shouldNotBeNull() - col.type.classifier shouldBe kClass - } - - assertColumnType("citric acid", Double::class) - assertColumnType("alcohol", Double::class) - assertColumnType("quality", Int::class) - } - - @Test - fun `read standard CSV with floats when user has alternative locale`() { - val currentLocale = Locale.getDefault() - try { - Locale.setDefault(Locale.forLanguageTag("ru-RU")) - val df = DataFrame.readCSV(wineCsv, delimiter = ';') - val schema = df.schema() - - fun assertColumnType(columnName: String, kClass: KClass<*>) { - val col = schema.columns[columnName] - col.shouldNotBeNull() - col.type.classifier shouldBe kClass - } - - assertColumnType("citric acid", Double::class) - assertColumnType("alcohol", Double::class) - assertColumnType("quality", Int::class) - } finally { - Locale.setDefault(currentLocale) - } - } - - @Test - fun `read with custom header`() { - val header = ('A'..'K').map { it.toString() } - val df = DataFrame.readCSV(simpleCsv, header = header, skipLines = 1) - df.columnNames() shouldBe header - df["B"].type() shouldBe typeOf() - - val headerShort = ('A'..'E').map { it.toString() } - val dfShort = DataFrame.readCSV(simpleCsv, header = headerShort, skipLines = 1) - dfShort.ncol shouldBe 5 - dfShort.columnNames() shouldBe headerShort - } - - @Test - fun `read first rows`() { - val expected = - listOf( - "untitled", - "user_id", - "name", - "duplicate", - "username", - "duplicate1", - "duplicate11", - "double", - "number", - "time", - "empty", - ) - val dfHeader = DataFrame.readCSV(simpleCsv, readLines = 0) - dfHeader.nrow shouldBe 0 - dfHeader.columnNames() shouldBe expected - - val dfThree = DataFrame.readCSV(simpleCsv, readLines = 3) - dfThree.nrow shouldBe 3 - - val dfFull = DataFrame.readCSV(simpleCsv, readLines = 10) - dfFull.nrow shouldBe 5 - } - - @Test - fun `if string starts with a number, it should be parsed as a string anyway`() { - @Language("CSV") - val df = DataFrame.readDelimStr( - """ - duration,floatDuration - 12 min,1.0 - 15,12.98 sec - 1 Season,0.9 parsec - """.trimIndent(), - ) - df["duration"].type() shouldBe typeOf() - df["floatDuration"].type() shouldBe typeOf() - } - - @Test - fun `if record has fewer columns than header then pad it with nulls`() { - val csvContent = - """ - col1,col2,col3 - 568,801,587 - 780,588 - """.trimIndent() - - val df = shouldNotThrowAny { - DataFrame.readDelimStr(csvContent) - } - - df shouldBe dataFrameOf("col1", "col2", "col3")( - 568, 801, 587, - 780, 588, null, - ) - } - - @Test - fun `write and read frame column`() { - val df = dataFrameOf("a", "b", "c")( - 1, 2, 3, - 1, 3, 2, - 2, 1, 3, - ) - val grouped = df.groupBy("a").into("g") - val str = grouped.toCsv() - val res = DataFrame.readDelimStr(str) - res shouldBe grouped - } - - @Test - fun `write and read column group`() { - val df = dataFrameOf("a", "b", "c")( - 1, 2, 3, - 1, 3, 2, - ) - val grouped = df.group("b", "c").into("d") - val str = grouped.toCsv() - val res = DataFrame.readDelimStr(str) - res shouldBe grouped - } - - @Test - fun `CSV String of saved dataframe starts with column name`() { - val df = dataFrameOf("a")(1) - df.toCsv().first() shouldBe 'a' - } - - @Test - fun `guess tsv`() { - val df = DataFrame.read(testResource("abc.tsv")) - df.columnsCount() shouldBe 3 - df.rowsCount() shouldBe 2 - } - - @Test - fun `write csv without header produce correct file`() { - val df = dataFrameOf("a", "b", "c")( - 1, 2, 3, - 1, 3, 2, - ) - df.writeCSV( - "src/test/resources/without_header.csv", - CSVFormat.DEFAULT.builder() - .setSkipHeaderRecord(true) - .build(), - ) - val producedFile = File("src/test/resources/without_header.csv") - producedFile.exists() shouldBe true - producedFile.readText() shouldBe "1,2,3\r\n1,3,2\r\n" - producedFile.delete() - } - - @Test - fun `check integrity of example data`() { - val df = DataFrame.readCSV("../data/jetbrains_repositories.csv") - df.columnNames() shouldBe listOf("full_name", "html_url", "stargazers_count", "topics", "watchers") - df.columnTypes() shouldBe - listOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) - df shouldBe DataFrame.readCSV("../data/jetbrains repositories.csv") - } - - @Test - fun `readDelimStr delimiter`() { - val tsv = - """ - a b c - 1 2 3 - """.trimIndent() - val df = DataFrame.readDelimStr(tsv, '\t') - df shouldBe dataFrameOf("a", "b", "c")(1, 2, 3) - } - - @Test - fun `file with BOM`() { - val df = DataFrame.readCSV(withBomCsv, delimiter = ';') - df.columnNames() shouldBe listOf("Column1", "Column2") - } - - @Test - fun `read empty delimStr or CSV`() { - val emptyDelimStr = DataFrame.readDelimStr("") - emptyDelimStr shouldBe DataFrame.empty() - - val emptyDelimFile = DataFrame.readDelim(File.createTempFile("empty", "csv").reader()) - emptyDelimFile shouldBe DataFrame.empty() - - val emptyCsvFile = DataFrame.readCSV(File.createTempFile("empty", "csv")) - emptyCsvFile shouldBe DataFrame.empty() - - val emptyCsvFileManualHeader = DataFrame.readCSV( - file = File.createTempFile("empty", "csv"), - header = listOf("a", "b", "c"), - ) - emptyCsvFileManualHeader.apply { - isEmpty() shouldBe true - columnNames() shouldBe listOf("a", "b", "c") - columnTypes() shouldBe listOf(typeOf(), typeOf(), typeOf()) - } - - val emptyCsvFileWithHeader = DataFrame.readCSV( - file = File.createTempFile("empty", "csv").also { it.writeText("a,b,c") }, - ) - emptyCsvFileWithHeader.apply { - isEmpty() shouldBe true - columnNames() shouldBe listOf("a", "b", "c") - columnTypes() shouldBe listOf(typeOf(), typeOf(), typeOf()) - } - - val emptyTsvStr = DataFrame.readTSV(File.createTempFile("empty", "tsv")) - emptyTsvStr shouldBe DataFrame.empty() - } - - // Issue #921 - @Test - fun `read csv with custom null strings and given type`() { - @Language("CSV") - val csv = - """ - a,b - noppes,2 - 1.2, - 3,45 - ,noppes - 1.3,1 - """.trimIndent() - - val df = DataFrame.readDelim( - reader = StringReader(csv), - parserOptions = ParserOptions( - nullStrings = setOf("noppes", ""), - ), - colTypes = mapOf("a" to ColType.Double, "b" to ColType.Int), - ) - df shouldBe dataFrameOf("a", "b")( - null, 2, - 1.2, null, - 3.0, 45, - null, null, - 1.3, 1, - ) - } - - companion object { - private val simpleCsv = testCsv("testCSV") - private val csvWithFrenchLocale = testCsv("testCSVwithFrenchLocale") - private val wineCsv = testCsv("wine") - private val withBomCsv = testCsv("with-bom") - } -} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/OtherSamples.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/OtherSamples.kt index b598f9a089..7657b8b55b 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/OtherSamples.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/OtherSamples.kt @@ -5,7 +5,7 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.take import org.jetbrains.kotlinx.dataframe.explainer.WritersideFooter import org.jetbrains.kotlinx.dataframe.explainer.WritersideStyle -import org.jetbrains.kotlinx.dataframe.io.readCSV +import org.jetbrains.kotlinx.dataframe.io.read import org.jetbrains.kotlinx.dataframe.io.toStandaloneHTML import org.junit.Test import java.io.File @@ -16,7 +16,7 @@ class OtherSamples { @Test fun example() { - val df = DataFrame.readCSV("../data/titanic.csv", delimiter = ';').take(5) + val df = DataFrame.read("../data/movies.csv").take(5) // writeTable(df, "exampleName") } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Schemas.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Schemas.kt index 1eecc0475a..c625a8657d 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Schemas.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Schemas.kt @@ -127,8 +127,8 @@ class Schemas { @TransformDataFrameExpressions fun useInferredSchema() { // SampleStart - // Repository.readCSV() has argument 'path' with default value https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv - val df = Repository.readCSV() + // Repository.readCsv() has argument 'path' with default value https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv + val df = Repository.readCsv() // Use generated properties to access data in rows df.maxBy { stargazersCount }.print() // Or to access columns in dataframe. diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt index b68f234a80..39362a68c3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt @@ -12,7 +12,6 @@ import org.jetbrains.kotlinx.dataframe.impl.api.StringParser import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser -import org.jetbrains.kotlinx.dataframe.io.readCSV import org.jetbrains.kotlinx.dataframe.typeClass import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS_COPY @@ -27,7 +26,7 @@ import kotlin.reflect.KType * These options are used to configure how [DataColumns][DataColumn] of type [String] or [String?][String] * should be parsed. * You can always pass a [ParserOptions] object to functions that perform parsing, like [tryParse], [parse], - * or even [DataFrame.readCSV][DataFrame.Companion.readCSV] to override these options. + * or even [DataFrame.readCsv][DataFrame.Companion.readCsv] to override these options. */ public val DataFrame.Companion.parser: GlobalParserOptions get() = Parsers diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Compression.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Compression.kt similarity index 64% rename from dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Compression.kt rename to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Compression.kt index 5ab1734a91..aa0c874aad 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Compression.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Compression.kt @@ -9,15 +9,12 @@ import java.util.zip.InflaterInputStream import java.util.zip.ZipInputStream /** - * Compression algorithm to use when reading csv files. + * Compression algorithm to use when reading files. * We support [GZIP][Compression.Gzip] and [ZIP][Compression.Zip] compression out of the box. * - * Custom decompression algorithms can be added by creating an instance of [Custom]. - * - * @param wrapStream function that wraps any [InputStream] into a decompressing [InflaterInputStream] stream + * Custom decompression algorithms can be added by creating an instance of [Compression]. */ -public sealed class Compression(public open val wrapStream: (InputStream) -> I) { - +public fun interface Compression { public companion object { public fun of(fileOrUrl: String): Compression<*> = when (fileOrUrl.split(".").last()) { @@ -33,26 +30,25 @@ public sealed class Compression(public open val wrapStream: (In public fun of(url: URL): Compression<*> = of(url.path) } + /** Wraps any [InputStream] into a decompressing [InflaterInputStream] stream */ + public fun wrapStream(inputStream: InputStream): I + /** Can be overridden to perform some actions before reading from the input stream. */ - public open fun doFirst(inputStream: I) {} + public fun doFirst(inputStream: I) {} /** * Can be overridden to perform some actions after reading from the input stream. * Remember to close the stream if you override this function. */ - public open fun doFinally(inputStream: I) { + public fun doFinally(inputStream: I) { inputStream.close() } - /** - * For .gz / GZIP files. - */ - public data object Gzip : Compression(wrapStream = ::GZIPInputStream) + /** For .gz / GZIP files */ + public data object Gzip : Compression by Compression(::GZIPInputStream) - /** - * For .zip / ZIP files. - */ - public data object Zip : Compression(wrapStream = ::ZipInputStream) { + /** For .zip / ZIP files */ + public data object Zip : Compression by Compression(::ZipInputStream) { override fun doFirst(inputStream: ZipInputStream) { // Make sure to call nextEntry once to prepare the stream @@ -69,21 +65,8 @@ public sealed class Compression(public open val wrapStream: (In } } - /** - * No compression. - */ - public data object None : Compression(wrapStream = { it }) - - /** - * Custom decompression algorithm. - * - * Can either be extended or instantiated directly with a custom [wrapStream] function. - * @param wrapStream function that wraps any [InputStream] into a decompressing [InputStream] - */ - public open class Custom(override val wrapStream: (InputStream) -> I) : - Compression(wrapStream = wrapStream) { - override fun toString(): String = "Compression.Custom(wrapStream = $wrapStream)" - } + /** No compression */ + public data object None : Compression by Compression({ it }) } /** diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 2bb8ca07ec..1073fcc074 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -19,18 +19,35 @@ import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadCsvMethod import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod import org.jetbrains.kotlinx.dataframe.impl.api.parse import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl +import org.jetbrains.kotlinx.dataframe.util.APACHE_CSV import org.jetbrains.kotlinx.dataframe.util.AS_URL import org.jetbrains.kotlinx.dataframe.util.AS_URL_IMPORT import org.jetbrains.kotlinx.dataframe.util.AS_URL_REPLACE import org.jetbrains.kotlinx.dataframe.util.DF_READ_NO_CSV import org.jetbrains.kotlinx.dataframe.util.DF_READ_NO_CSV_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_CSV +import org.jetbrains.kotlinx.dataframe.util.READ_CSV_FILE_OR_URL_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_CSV_FILE_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_CSV_IMPORT +import org.jetbrains.kotlinx.dataframe.util.READ_CSV_STREAM_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_CSV_URL_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_DELIM +import org.jetbrains.kotlinx.dataframe.util.READ_DELIM_READER_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_DELIM_STREAM_REPLACE +import org.jetbrains.kotlinx.dataframe.util.TO_CSV +import org.jetbrains.kotlinx.dataframe.util.TO_CSV_IMPORT +import org.jetbrains.kotlinx.dataframe.util.TO_CSV_REPLACE +import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV +import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV_FILE_REPLACE +import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV_IMPORT +import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV_PATH_REPLACE +import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV_WRITER_REPLACE import org.jetbrains.kotlinx.dataframe.values import java.io.BufferedInputStream import java.io.BufferedReader import java.io.File import java.io.FileInputStream import java.io.FileWriter -import java.io.IOException import java.io.InputStream import java.io.InputStreamReader import java.io.Reader @@ -46,6 +63,7 @@ import kotlin.reflect.KType import kotlin.reflect.typeOf import kotlin.time.Duration +@Deprecated(message = APACHE_CSV, level = DeprecationLevel.WARNING) public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat { override fun readDataFrame(stream: InputStream, header: List): AnyFrame = DataFrame.readCSV(stream = stream, delimiter = delimiter, header = header) @@ -57,7 +75,7 @@ public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat { override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough - override val testOrder: Int = 20000 + override val testOrder: Int = 20_001 override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod { val arguments = MethodArguments().add("delimiter", typeOf(), "'%L'", delimiter) @@ -65,6 +83,10 @@ public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat { } } +@Deprecated( + message = APACHE_CSV, + level = DeprecationLevel.WARNING, +) public enum class CSVType(public val format: CSVFormat) { DEFAULT( CSVFormat.DEFAULT.builder() @@ -81,12 +103,19 @@ public enum class CSVType(public val format: CSVFormat) { private val defaultCharset = Charsets.UTF_8 +@Deprecated("", level = DeprecationLevel.WARNING) internal fun isCompressed(fileOrUrl: String) = listOf("gz", "zip").contains(fileOrUrl.split(".").last()) +@Deprecated("", level = DeprecationLevel.WARNING) internal fun isCompressed(file: File) = listOf("gz", "zip").contains(file.extension) +@Deprecated("", level = DeprecationLevel.WARNING) internal fun isCompressed(url: URL) = isCompressed(url.path) +@Deprecated( + message = APACHE_CSV, + level = DeprecationLevel.HIDDEN, // clashes with the new readDelim +) @Refine @Interpretable("ReadDelimStr") public fun DataFrame.Companion.readDelimStr( @@ -106,7 +135,7 @@ public fun DataFrame.Companion.readDelimStr( @Deprecated( message = DF_READ_NO_CSV, - replaceWith = ReplaceWith(DF_READ_NO_CSV_REPLACE), + replaceWith = ReplaceWith(DF_READ_NO_CSV_REPLACE, READ_CSV_IMPORT), level = DeprecationLevel.ERROR, ) public fun DataFrame.Companion.read( @@ -118,22 +147,13 @@ public fun DataFrame.Companion.read( readLines: Int? = null, duplicate: Boolean = true, charset: Charset = Charsets.UTF_8, -): DataFrame<*> = - catchHttpResponse(asUrl(fileOrUrl)) { - readDelim( - it, - delimiter, - header, - isCompressed(fileOrUrl), - getCSVType(fileOrUrl), - colTypes, - skipLines, - readLines, - duplicate, - charset, - ) - } +): DataFrame<*> = error(DF_READ_NO_CSV) +@Deprecated( + message = READ_CSV, + replaceWith = ReplaceWith(READ_CSV_FILE_OR_URL_REPLACE, READ_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) @OptInRefine @Interpretable("ReadCSV0") public fun DataFrame.Companion.readCSV( @@ -163,6 +183,11 @@ public fun DataFrame.Companion.readCSV( ) } +@Deprecated( + message = READ_CSV, + replaceWith = ReplaceWith(READ_CSV_FILE_REPLACE, READ_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readCSV( file: File, delimiter: Char = ',', @@ -188,6 +213,11 @@ public fun DataFrame.Companion.readCSV( parserOptions, ) +@Deprecated( + message = READ_CSV, + replaceWith = ReplaceWith(READ_CSV_URL_REPLACE, READ_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readCSV( url: URL, delimiter: Char = ',', @@ -212,6 +242,11 @@ public fun DataFrame.Companion.readCSV( parserOptions, ) +@Deprecated( + message = READ_CSV, + replaceWith = ReplaceWith(READ_CSV_STREAM_REPLACE, READ_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readCSV( stream: InputStream, delimiter: Char = ',', @@ -238,13 +273,6 @@ public fun DataFrame.Companion.readCSV( parserOptions, ) -private fun getCSVType(path: String): CSVType = - when (path.substringAfterLast('.').lowercase()) { - "csv" -> CSVType.DEFAULT - "tdf" -> CSVType.TDF - else -> throw IOException("Unknown file format") - } - @Deprecated( message = AS_URL, replaceWith = ReplaceWith(AS_URL_REPLACE, AS_URL_IMPORT), @@ -264,6 +292,11 @@ private fun getFormat( .setAllowMissingColumnNames(duplicate) .build() +@Deprecated( + message = READ_DELIM, + replaceWith = ReplaceWith(READ_DELIM_STREAM_REPLACE), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readDelim( inStream: InputStream, delimiter: Char = ',', @@ -343,6 +376,11 @@ public fun ColType.toKType(): KType = ColType.Char -> typeOf() } +@Deprecated( + message = READ_DELIM, + replaceWith = ReplaceWith(READ_DELIM_READER_REPLACE), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readDelim( reader: Reader, format: CSVFormat = CSVFormat.DEFAULT.builder() @@ -371,12 +409,27 @@ public fun DataFrame.Companion.readDelim( ) } +@Deprecated( + message = WRITE_CSV, + replaceWith = ReplaceWith(WRITE_CSV_FILE_REPLACE, WRITE_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun AnyFrame.writeCSV(file: File, format: CSVFormat = CSVFormat.DEFAULT): Unit = writeCSV(FileWriter(file), format) +@Deprecated( + message = WRITE_CSV, + replaceWith = ReplaceWith(WRITE_CSV_PATH_REPLACE, WRITE_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun AnyFrame.writeCSV(path: String, format: CSVFormat = CSVFormat.DEFAULT): Unit = writeCSV(FileWriter(path), format) +@Deprecated( + message = WRITE_CSV, + replaceWith = ReplaceWith(WRITE_CSV_WRITER_REPLACE, WRITE_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun AnyFrame.writeCSV(writer: Appendable, format: CSVFormat = CSVFormat.DEFAULT) { format.print(writer).use { printer -> if (!format.skipHeaderRecord) { @@ -395,6 +448,11 @@ public fun AnyFrame.writeCSV(writer: Appendable, format: CSVFormat = CSVFormat.D } } +@Deprecated( + message = TO_CSV, + replaceWith = ReplaceWith(TO_CSV_REPLACE, TO_CSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun AnyFrame.toCsv(format: CSVFormat = CSVFormat.DEFAULT): String = StringWriter().use { this.writeCSV(it, format) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index a507c218d5..d321d5f1dc 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -5,12 +5,23 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.ParserOptions import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadTsvMethod +import org.jetbrains.kotlinx.dataframe.util.APACHE_CSV +import org.jetbrains.kotlinx.dataframe.util.READ_TSV +import org.jetbrains.kotlinx.dataframe.util.READ_TSV_FILE_OR_URL_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_TSV_FILE_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_TSV_IMPORT +import org.jetbrains.kotlinx.dataframe.util.READ_TSV_STREAM_REPLACE +import org.jetbrains.kotlinx.dataframe.util.READ_TSV_URL_REPLACE import java.io.File import java.io.FileInputStream import java.io.InputStream import java.net.URL import java.nio.charset.Charset +@Deprecated( + message = APACHE_CSV, + level = DeprecationLevel.WARNING, +) public class TSV : SupportedDataFrameFormat { override fun readDataFrame(stream: InputStream, header: List): AnyFrame = DataFrame.readTSV(stream, header = header) @@ -21,7 +32,7 @@ public class TSV : SupportedDataFrameFormat { override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough - override val testOrder: Int = 30000 + override val testOrder: Int = 30_001 override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod = DefaultReadTsvMethod(pathRepresentation) @@ -29,6 +40,11 @@ public class TSV : SupportedDataFrameFormat { private const val TAB_CHAR = '\t' +@Deprecated( + message = READ_TSV, + replaceWith = ReplaceWith(READ_TSV_FILE_OR_URL_REPLACE, READ_TSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readTSV( fileOrUrl: String, header: List = listOf(), @@ -55,6 +71,11 @@ public fun DataFrame.Companion.readTSV( ) } +@Deprecated( + message = READ_TSV, + replaceWith = ReplaceWith(READ_TSV_FILE_REPLACE, READ_TSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readTSV( file: File, header: List = listOf(), @@ -77,6 +98,11 @@ public fun DataFrame.Companion.readTSV( charset, ) +@Deprecated( + message = READ_TSV, + replaceWith = ReplaceWith(READ_TSV_URL_REPLACE, READ_TSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readTSV( url: URL, header: List = listOf(), @@ -99,6 +125,11 @@ public fun DataFrame.Companion.readTSV( parserOptions, ) +@Deprecated( + message = READ_TSV, + replaceWith = ReplaceWith(READ_TSV_STREAM_REPLACE, READ_TSV_IMPORT), + level = DeprecationLevel.WARNING, +) public fun DataFrame.Companion.readTSV( stream: InputStream, header: List = listOf(), diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt index 12f1e15c2a..f1581e1154 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt @@ -157,8 +157,7 @@ internal class Integration(private val notebook: Notebook, private val options: override fun Builder.onLoaded() { if (version != null) { if (enableExperimentalCsv?.toBoolean() == true) { - println("Enabling experimental CSV module: dataframe-csv") - dependencies("org.jetbrains.kotlinx:dataframe-csv:$version") + println("CSV module is already enabled by default now.") } if (enableExperimentalGeo?.toBoolean() == true) { println("Enabling experimental Geo module: dataframe-geo") @@ -166,6 +165,7 @@ internal class Integration(private val notebook: Notebook, private val options: dependencies("org.jetbrains.kotlinx:dataframe-geo:$version") } dependencies( + "org.jetbrains.kotlinx:dataframe-csv:$version", "org.jetbrains.kotlinx:dataframe-excel:$version", "org.jetbrains.kotlinx:dataframe-jdbc:$version", "org.jetbrains.kotlinx:dataframe-arrow:$version", diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index b2e213e07b..a500535206 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -11,9 +11,9 @@ package org.jetbrains.kotlinx.dataframe.util private const val MESSAGE_0_16 = "Will be ERROR in 0.16." -internal const val DF_READ_NO_CSV = "This function is deprecated and should be replaced with `readCSV`. $MESSAGE_0_16" +internal const val DF_READ_NO_CSV = "This function is deprecated and should be replaced with `readCsv`. $MESSAGE_0_16" internal const val DF_READ_NO_CSV_REPLACE = - "this.readCSV(fileOrUrl, delimiter, header, colTypes, skipLines, readLines, duplicate, charset)" + "this.readCsv(fileOrUrl = fileOrUrl, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" internal const val CREATE_FRAME_COLUMN = "Removed from public API as this can likely better be solved by `DataFrame.chunked()`. Replaced by internal df.chunkedImpl(). $MESSAGE_0_16" @@ -66,6 +66,50 @@ internal const val MINUS_REPLACE = "this.remove(columns)" private const val MESSAGE_0_17 = "Will be ERROR in 0.17." +internal const val APACHE_CSV = + "The Apache-based CSV/TSV reader is deprecated in favor of the new Deephaven CSV reader in dataframe-csv. $MESSAGE_0_17" +internal const val READ_CSV = + "Apache-based readCSV() is deprecated in favor of Deephaven-based readCsv() in dataframe-csv. $MESSAGE_0_17" +internal const val READ_CSV_IMPORT = "org.jetbrains.kotlinx.dataframe.io.readCsv" +internal const val READ_CSV_FILE_OR_URL_REPLACE = + "this.readCsv(fileOrUrl = fileOrUrl, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_CSV_FILE_REPLACE = + "this.readCsv(file = file, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_CSV_URL_REPLACE = + "this.readCsv(url = url, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_CSV_STREAM_REPLACE = + "this.readCsv(inputStream = stream, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" + +internal const val READ_DELIM = + "Apache-based readDelim() is deprecated in favor of Deephaven-based readDelim() in dataframe-csv. $MESSAGE_0_17" +internal const val READ_DELIM_STREAM_REPLACE = + "this.readDelim(inputStream = inStream, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_DELIM_READER_REPLACE = + "this.readDelimStr(text = reader.readText(), delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" + +internal const val READ_TSV = + "Apache-based readTSV() is deprecated in favor of Deephaven-based readTsv() in dataframe-csv. $MESSAGE_0_17" +internal const val READ_TSV_IMPORT = "org.jetbrains.kotlinx.dataframe.io.readTsv" +internal const val READ_TSV_FILE_OR_URL_REPLACE = + "this.readTsv(fileOrUrl = fileOrUrl, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_TSV_FILE_REPLACE = + "this.readTsv(file = file, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_TSV_URL_REPLACE = + "this.readTsv(url = url, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" +internal const val READ_TSV_STREAM_REPLACE = + "this.readTsv(inputStream = stream, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)" + +internal const val WRITE_CSV = + "The writeCSV() functions are deprecated in favor of writeCsv() in dataframe-csv. $MESSAGE_0_17" +internal const val WRITE_CSV_IMPORT = "org.jetbrains.kotlinx.dataframe.io.writeCsv" +internal const val WRITE_CSV_FILE_REPLACE = "this.writeCsv(file = file)" +internal const val WRITE_CSV_PATH_REPLACE = "this.writeCsv(path = path)" +internal const val WRITE_CSV_WRITER_REPLACE = "this.writeCsv(writer = writer)" + +internal const val TO_CSV = "toCsv() is deprecated in favor of toCsvStr() in dataframe-csv. $MESSAGE_0_17" +internal const val TO_CSV_IMPORT = "org.jetbrains.kotlinx.dataframe.io.toCsvStr" +internal const val TO_CSV_REPLACE = "this.toCsvStr()" + // endregion // region keep across releases diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/CsvTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/CsvTests.kt deleted file mode 100644 index 6a9d67a121..0000000000 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/CsvTests.kt +++ /dev/null @@ -1,366 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.io - -import io.kotest.assertions.throwables.shouldNotThrowAny -import io.kotest.matchers.nulls.shouldNotBeNull -import io.kotest.matchers.shouldBe -import kotlinx.datetime.LocalDateTime -import org.apache.commons.csv.CSVFormat -import org.intellij.lang.annotations.Language -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.api.ParserOptions -import org.jetbrains.kotlinx.dataframe.api.allNulls -import org.jetbrains.kotlinx.dataframe.api.convert -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.group -import org.jetbrains.kotlinx.dataframe.api.groupBy -import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.isEmpty -import org.jetbrains.kotlinx.dataframe.api.schema -import org.jetbrains.kotlinx.dataframe.api.toStr -import org.jetbrains.kotlinx.dataframe.ncol -import org.jetbrains.kotlinx.dataframe.nrow -import org.jetbrains.kotlinx.dataframe.testCsv -import org.jetbrains.kotlinx.dataframe.testResource -import org.junit.Test -import java.io.File -import java.io.StringReader -import java.io.StringWriter -import java.net.URL -import java.util.Locale -import kotlin.reflect.KClass -import kotlin.reflect.typeOf - -@Suppress("ktlint:standard:argument-list-wrapping") -class CsvTests { - - @Test - fun readNulls() { - val src = - """ - first,second - 2,, - 3,, - """.trimIndent() - val df = DataFrame.readDelimStr(src) - df.nrow shouldBe 2 - df.ncol shouldBe 2 - df["first"].type() shouldBe typeOf() - df["second"].allNulls() shouldBe true - df["second"].type() shouldBe typeOf() - } - - @Test - fun write() { - val df = dataFrameOf("col1", "col2")( - 1, null, - 2, null, - ).convert("col2").toStr() - - val str = StringWriter() - df.writeCSV(str) - - val res = DataFrame.readDelimStr(str.buffer.toString()) - - res shouldBe df - } - - @Test - fun readCSV() { - val df = DataFrame.read(simpleCsv) - - df.ncol shouldBe 11 - df.nrow shouldBe 5 - df.columnNames()[5] shouldBe "duplicate1" - df.columnNames()[6] shouldBe "duplicate11" - df["duplicate1"].type() shouldBe typeOf() - df["double"].type() shouldBe typeOf() - df["time"].type() shouldBe typeOf() - - println(df) - } - - @Test - fun readCsvWithFrenchLocaleAndAlternativeDelimiter() { - val df = DataFrame.readCSV( - url = csvWithFrenchLocale, - delimiter = ';', - parserOptions = ParserOptions(locale = Locale.FRENCH), - ) - - df.ncol shouldBe 11 - df.nrow shouldBe 5 - df.columnNames()[5] shouldBe "duplicate1" - df.columnNames()[6] shouldBe "duplicate11" - df["duplicate1"].type() shouldBe typeOf() - df["double"].type() shouldBe typeOf() - df["number"].type() shouldBe typeOf() - df["time"].type() shouldBe typeOf() - - println(df) - } - - @Test - fun readCsvWithFloats() { - val df = DataFrame.readCSV(wineCsv, delimiter = ';') - val schema = df.schema() - - fun assertColumnType(columnName: String, kClass: KClass<*>) { - val col = schema.columns[columnName] - col.shouldNotBeNull() - col.type.classifier shouldBe kClass - } - - assertColumnType("citric acid", Double::class) - assertColumnType("alcohol", Double::class) - assertColumnType("quality", Int::class) - } - - @Test - fun `read standard CSV with floats when user has alternative locale`() { - val currentLocale = Locale.getDefault() - try { - Locale.setDefault(Locale.forLanguageTag("ru-RU")) - val df = DataFrame.readCSV(wineCsv, delimiter = ';') - val schema = df.schema() - - fun assertColumnType(columnName: String, kClass: KClass<*>) { - val col = schema.columns[columnName] - col.shouldNotBeNull() - col.type.classifier shouldBe kClass - } - - assertColumnType("citric acid", Double::class) - assertColumnType("alcohol", Double::class) - assertColumnType("quality", Int::class) - } finally { - Locale.setDefault(currentLocale) - } - } - - @Test - fun `read with custom header`() { - val header = ('A'..'K').map { it.toString() } - val df = DataFrame.readCSV(simpleCsv, header = header, skipLines = 1) - df.columnNames() shouldBe header - df["B"].type() shouldBe typeOf() - - val headerShort = ('A'..'E').map { it.toString() } - val dfShort = DataFrame.readCSV(simpleCsv, header = headerShort, skipLines = 1) - dfShort.ncol shouldBe 5 - dfShort.columnNames() shouldBe headerShort - } - - @Test - fun `read first rows`() { - val expected = - listOf( - "untitled", - "user_id", - "name", - "duplicate", - "username", - "duplicate1", - "duplicate11", - "double", - "number", - "time", - "empty", - ) - val dfHeader = DataFrame.readCSV(simpleCsv, readLines = 0) - dfHeader.nrow shouldBe 0 - dfHeader.columnNames() shouldBe expected - - val dfThree = DataFrame.readCSV(simpleCsv, readLines = 3) - dfThree.nrow shouldBe 3 - - val dfFull = DataFrame.readCSV(simpleCsv, readLines = 10) - dfFull.nrow shouldBe 5 - } - - @Test - fun `if string starts with a number, it should be parsed as a string anyway`() { - @Language("CSV") - val df = DataFrame.readDelimStr( - """ - duration,floatDuration - 12 min,1.0 - 15,12.98 sec - 1 Season,0.9 parsec - """.trimIndent(), - ) - df["duration"].type() shouldBe typeOf() - df["floatDuration"].type() shouldBe typeOf() - } - - @Test - fun `if record has fewer columns than header then pad it with nulls`() { - val csvContent = - """ - col1,col2,col3 - 568,801,587 - 780,588 - """.trimIndent() - - val df = shouldNotThrowAny { - DataFrame.readDelimStr(csvContent) - } - - df shouldBe dataFrameOf("col1", "col2", "col3")( - 568, 801, 587, - 780, 588, null, - ) - } - - @Test - fun `write and read frame column`() { - val df = dataFrameOf("a", "b", "c")( - 1, 2, 3, - 1, 3, 2, - 2, 1, 3, - ) - val grouped = df.groupBy("a").into("g") - val str = grouped.toCsv() - val res = DataFrame.readDelimStr(str) - res shouldBe grouped - } - - @Test - fun `write and read column group`() { - val df = dataFrameOf("a", "b", "c")( - 1, 2, 3, - 1, 3, 2, - ) - val grouped = df.group("b", "c").into("d") - val str = grouped.toCsv() - val res = DataFrame.readDelimStr(str) - res shouldBe grouped - } - - @Test - fun `CSV String of saved dataframe starts with column name`() { - val df = dataFrameOf("a")(1) - df.toCsv().first() shouldBe 'a' - } - - @Test - fun `guess tsv`() { - val df = DataFrame.read(testResource("abc.tsv")) - df.columnsCount() shouldBe 3 - df.rowsCount() shouldBe 2 - } - - @Test - fun `write csv without header produce correct file`() { - val df = dataFrameOf("a", "b", "c")( - 1, 2, 3, - 1, 3, 2, - ) - df.writeCSV( - "src/test/resources/without_header.csv", - CSVFormat.DEFAULT.builder() - .setSkipHeaderRecord(true) - .build(), - ) - val producedFile = File("src/test/resources/without_header.csv") - producedFile.exists() shouldBe true - producedFile.readText() shouldBe "1,2,3\r\n1,3,2\r\n" - producedFile.delete() - } - - @Test - fun `check integrity of example data`() { - val df = DataFrame.readCSV("../data/jetbrains_repositories.csv") - df.columnNames() shouldBe listOf("full_name", "html_url", "stargazers_count", "topics", "watchers") - df.columnTypes() shouldBe - listOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) - df shouldBe DataFrame.readCSV("../data/jetbrains repositories.csv") - } - - @Test - fun `readDelimStr delimiter`() { - val tsv = - """ - a b c - 1 2 3 - """.trimIndent() - val df = DataFrame.readDelimStr(tsv, '\t') - df shouldBe dataFrameOf("a", "b", "c")(1, 2, 3) - } - - @Test - fun `file with BOM`() { - val df = DataFrame.readCSV(withBomCsv, delimiter = ';') - df.columnNames() shouldBe listOf("Column1", "Column2") - } - - @Test - fun `read empty delimStr or CSV`() { - val emptyDelimStr = DataFrame.readDelimStr("") - emptyDelimStr shouldBe DataFrame.empty() - - val emptyDelimFile = DataFrame.readDelim(File.createTempFile("empty", "csv").reader()) - emptyDelimFile shouldBe DataFrame.empty() - - val emptyCsvFile = DataFrame.readCSV(File.createTempFile("empty", "csv")) - emptyCsvFile shouldBe DataFrame.empty() - - val emptyCsvFileManualHeader = DataFrame.readCSV( - file = File.createTempFile("empty", "csv"), - header = listOf("a", "b", "c"), - ) - emptyCsvFileManualHeader.apply { - isEmpty() shouldBe true - columnNames() shouldBe listOf("a", "b", "c") - columnTypes() shouldBe listOf(typeOf(), typeOf(), typeOf()) - } - - val emptyCsvFileWithHeader = DataFrame.readCSV( - file = File.createTempFile("empty", "csv").also { it.writeText("a,b,c") }, - ) - emptyCsvFileWithHeader.apply { - isEmpty() shouldBe true - columnNames() shouldBe listOf("a", "b", "c") - columnTypes() shouldBe listOf(typeOf(), typeOf(), typeOf()) - } - - val emptyTsvStr = DataFrame.readTSV(File.createTempFile("empty", "tsv")) - emptyTsvStr shouldBe DataFrame.empty() - } - - // Issue #921 - @Test - fun `read csv with custom null strings and given type`() { - @Language("CSV") - val csv = - """ - a,b - noppes,2 - 1.2, - 3,45 - ,noppes - 1.3,1 - """.trimIndent() - - val df = DataFrame.readDelim( - reader = StringReader(csv), - parserOptions = ParserOptions( - nullStrings = setOf("noppes", ""), - ), - colTypes = mapOf("a" to ColType.Double, "b" to ColType.Int), - ) - df shouldBe dataFrameOf("a", "b")( - null, 2, - 1.2, null, - 3.0, 45, - null, null, - 1.3, 1, - ) - } - - companion object { - private val simpleCsv = testCsv("testCSV") - private val csvWithFrenchLocale = testCsv("testCSVwithFrenchLocale") - private val wineCsv = testCsv("wine") - private val withBomCsv = testCsv("with-bom") - } -} diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/OtherSamples.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/OtherSamples.kt index b598f9a089..7657b8b55b 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/OtherSamples.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/OtherSamples.kt @@ -5,7 +5,7 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.take import org.jetbrains.kotlinx.dataframe.explainer.WritersideFooter import org.jetbrains.kotlinx.dataframe.explainer.WritersideStyle -import org.jetbrains.kotlinx.dataframe.io.readCSV +import org.jetbrains.kotlinx.dataframe.io.read import org.jetbrains.kotlinx.dataframe.io.toStandaloneHTML import org.junit.Test import java.io.File @@ -16,7 +16,7 @@ class OtherSamples { @Test fun example() { - val df = DataFrame.readCSV("../data/titanic.csv", delimiter = ';').take(5) + val df = DataFrame.read("../data/movies.csv").take(5) // writeTable(df, "exampleName") } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Schemas.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Schemas.kt index 1eecc0475a..c625a8657d 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Schemas.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Schemas.kt @@ -127,8 +127,8 @@ class Schemas { @TransformDataFrameExpressions fun useInferredSchema() { // SampleStart - // Repository.readCSV() has argument 'path' with default value https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv - val df = Repository.readCSV() + // Repository.readCsv() has argument 'path' with default value https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv + val df = Repository.readCsv() // Use generated properties to access data in rows df.maxBy { stargazersCount }.print() // Or to access columns in dataframe. diff --git a/dataframe-csv/api/dataframe-csv.api b/dataframe-csv/api/dataframe-csv.api index 6e467c27d4..6fca0490d6 100644 --- a/dataframe-csv/api/dataframe-csv.api +++ b/dataframe-csv/api/dataframe-csv.api @@ -1,53 +1,3 @@ -public abstract class org/jetbrains/kotlinx/dataframe/io/Compression { - public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/Compression$Companion; - public synthetic fun (Lkotlin/jvm/functions/Function1;Lkotlin/jvm/internal/DefaultConstructorMarker;)V - public fun doFinally (Ljava/io/InputStream;)V - public fun doFirst (Ljava/io/InputStream;)V - public fun getWrapStream ()Lkotlin/jvm/functions/Function1; -} - -public final class org/jetbrains/kotlinx/dataframe/io/Compression$Companion { - public final fun of (Ljava/io/File;)Lorg/jetbrains/kotlinx/dataframe/io/Compression; - public final fun of (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/Compression; - public final fun of (Ljava/net/URL;)Lorg/jetbrains/kotlinx/dataframe/io/Compression; - public final fun of (Ljava/nio/file/Path;)Lorg/jetbrains/kotlinx/dataframe/io/Compression; -} - -public class org/jetbrains/kotlinx/dataframe/io/Compression$Custom : org/jetbrains/kotlinx/dataframe/io/Compression { - public fun (Lkotlin/jvm/functions/Function1;)V - public fun getWrapStream ()Lkotlin/jvm/functions/Function1; - public fun toString ()Ljava/lang/String; -} - -public final class org/jetbrains/kotlinx/dataframe/io/Compression$Gzip : org/jetbrains/kotlinx/dataframe/io/Compression { - public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/Compression$Gzip; - public fun equals (Ljava/lang/Object;)Z - public fun hashCode ()I - public fun toString ()Ljava/lang/String; -} - -public final class org/jetbrains/kotlinx/dataframe/io/Compression$None : org/jetbrains/kotlinx/dataframe/io/Compression { - public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/Compression$None; - public fun equals (Ljava/lang/Object;)Z - public fun hashCode ()I - public fun toString ()Ljava/lang/String; -} - -public final class org/jetbrains/kotlinx/dataframe/io/Compression$Zip : org/jetbrains/kotlinx/dataframe/io/Compression { - public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/Compression$Zip; - public synthetic fun doFinally (Ljava/io/InputStream;)V - public fun doFinally (Ljava/util/zip/ZipInputStream;)V - public synthetic fun doFirst (Ljava/io/InputStream;)V - public fun doFirst (Ljava/util/zip/ZipInputStream;)V - public fun equals (Ljava/lang/Object;)Z - public fun hashCode ()I - public fun toString ()Ljava/lang/String; -} - -public final class org/jetbrains/kotlinx/dataframe/io/CompressionKt { - public static final fun useDecompressed (Ljava/io/InputStream;Lorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object; -} - public final class org/jetbrains/kotlinx/dataframe/io/CsvDeephaven : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat { public fun ()V public fun (C)V diff --git a/dataframe-csv/build.gradle.kts b/dataframe-csv/build.gradle.kts index 70367d0245..944ec89170 100644 --- a/dataframe-csv/build.gradle.kts +++ b/dataframe-csv/build.gradle.kts @@ -161,3 +161,14 @@ kotlinPublications { kotlin { explicitApi() } + +val instrumentedJars: Configuration by configurations.creating { + isCanBeConsumed = true + isCanBeResolved = false +} + +artifacts { + add("instrumentedJars", tasks.jar.get().archiveFile) { + builtBy(tasks.jar) + } +} diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt index f35c289a59..9a069fd714 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt @@ -4,7 +4,6 @@ import io.deephaven.csv.CsvSpecs import org.apache.commons.csv.CSVFormat import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.ParserOptions -import org.jetbrains.kotlinx.dataframe.api.parser import org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs @@ -84,7 +83,7 @@ internal object DelimParams { * Columns widths are determined by the header in the data (if present), or manually by setting * [fixedColumnWidths]. */ - val HAS_FIXED_WIDTH_COLUMNS: Boolean = false + const val HAS_FIXED_WIDTH_COLUMNS: Boolean = false /** * @param fixedColumnWidths The fixed column widths. Default: empty list. @@ -137,8 +136,6 @@ internal object DelimParams { * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses] to diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt index 5c067ae151..96abeece05 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt @@ -45,7 +45,6 @@ import java.io.InputStream import java.math.BigDecimal import java.math.BigInteger import java.net.URL -import java.util.Locale import kotlin.reflect.KType import kotlin.reflect.full.withNullability import kotlin.reflect.typeOf @@ -62,7 +61,7 @@ import kotlin.time.Duration * (use [skipLines] if there's a header in the data). * If empty (default), the header will be read from the data. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type. * * If supplied for a certain column name (inferred from data or given by [header]), @@ -98,8 +97,6 @@ import kotlin.time.Duration * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -153,11 +150,7 @@ internal fun readDelimImpl( ): DataFrame<*> { // set up the csv specs val csvSpecs = with(CsvSpecs.builder()) { - // turn on fast double parser if not explicitly set regardless of the global parser options - @Suppress("NullableBooleanElvis") - val adjustedParserOptions = (parserOptions ?: ParserOptions()) - .copy(useFastDoubleParser = parserOptions?.useFastDoubleParser ?: true) - customDoubleParser(DataFrameCustomDoubleParser(adjustedParserOptions)) + customDoubleParser(DataFrameCustomDoubleParser(parserOptions)) // use the given nullStrings if provided, else take the global ones + some extras val nullStrings = parserOptions?.nullStrings ?: (DataFrame.parser.nulls + DEFAULT_DELIM_NULL_STRINGS) @@ -176,14 +169,7 @@ internal fun readDelimImpl( hasFixedWidthColumns(hasFixedWidthColumns) if (hasFixedWidthColumns && fixedColumnWidths.isNotEmpty()) fixedColumnWidths(fixedColumnWidths) skipLines(takeHeaderFromCsv = header.isEmpty(), skipLines = skipLines) - - // Deephaven's LocalDateTime parser is unconfigurable, so if the user provides a locale, pattern, or formatter - // that's not compatible, we must use our own parser for LocalDateTime and let Deephaven read them as Strings. - val useDeepHavenLocalDateTime = - (parserOptions?.locale ?: DataFrame.parser.locale) in setOf(Locale.ROOT, Locale.US, Locale.ENGLISH) && - parserOptions?.dateTimePattern == null && - parserOptions?.dateTimeFormatter == null - parsers(parserOptions, colTypes, useDeepHavenLocalDateTime) + parsers(parserOptions, colTypes) adjustCsvSpecs(this, this) }.build() @@ -316,7 +302,7 @@ private fun CsvSpecs.Builder.skipLines(takeHeaderFromCsv: Boolean, skipLines: Lo * Logic overview: * * - if no [colTypes] are given - * - let deephaven use all its [default parsers][Parsers.DEFAULT] + * - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME] * - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied * - if [colTypes] are supplied * - if [ColType.DEFAULT] is among the values @@ -324,33 +310,35 @@ private fun CsvSpecs.Builder.skipLines(takeHeaderFromCsv: Boolean, skipLines: Lo * - let deephaven use _only_ the parser given as [ColType.DEFAULT] type * - if [ColType.DEFAULT] is not among the values * - set the parser for each supplied column+coltype - * - let deephaven use all its [default parsers][Parsers.DEFAULT] + * - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME] * - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied * + * We will not use [Deephaven's DateTime parser][Parsers.DATETIME]. + * This is done to avoid different behavior compared to [DataFrame.parse]; + * Deephaven parses [Instant] as [LocalDateTime]. [Issue #1047](https://github.com/Kotlin/dataframe/issues/1047) + * * Note that `skipTypes` will never skip a type explicitly set by `colTypes`. * This is intended. */ -private fun CsvSpecs.Builder.parsers( - parserOptions: ParserOptions?, - colTypes: Map, - useDeepHavenLocalDateTime: Boolean, -): CsvSpecs.Builder { +private fun CsvSpecs.Builder.parsers(parserOptions: ParserOptions?, colTypes: Map): CsvSpecs.Builder { for ((colName, colType) in colTypes) { if (colName == ColType.DEFAULT) continue - putParserForName(colName, colType.toCsvParser(useDeepHavenLocalDateTime)) + putParserForName(colName, colType.toCsvParser()) } + // BOOLEAN, INT, LONG, DOUBLE, CHAR, STRING + val defaultParsers = Parsers.DEFAULT - Parsers.DATETIME val skipTypes = parserOptions?.skipTypes ?: DataFrame.parser.skipTypes val parsersToUse = when { ColType.DEFAULT in colTypes -> - listOf(colTypes[ColType.DEFAULT]!!.toCsvParser(useDeepHavenLocalDateTime)) + listOf(colTypes[ColType.DEFAULT]!!.toCsvParser(), Parsers.STRING) skipTypes.isNotEmpty() -> { val parsersToSkip = skipTypes - .mapNotNull { it.toColType().toCsvParserOrNull(useDeepHavenLocalDateTime) } - Parsers.DEFAULT.toSet() - parsersToSkip.toSet() + .mapNotNull { it.toColType().toCsvParserOrNull() } + defaultParsers.toSet() - parsersToSkip.toSet() } - else -> Parsers.DEFAULT // BOOLEAN, INT, LONG, DOUBLE, DATETIME, CHAR, STRING + else -> defaultParsers } parsers(parsersToUse) return this @@ -369,7 +357,7 @@ private fun CsvSpecs.Builder.header(header: List): CsvSpecs.Builder = * Converts a [ColType] to a [Parser] from the Deephaven CSV library. * If no direct [Parser] exists, it returns `null`. */ -internal fun ColType.toCsvParserOrNull(useDeepHavenLocalDateTime: Boolean): Parser<*>? = +internal fun ColType.toCsvParserOrNull(): Parser<*>? = when (this) { ColType.Int -> Parsers.INT ColType.Long -> Parsers.LONG @@ -377,7 +365,6 @@ internal fun ColType.toCsvParserOrNull(useDeepHavenLocalDateTime: Boolean): Pars ColType.Char -> Parsers.CHAR ColType.Boolean -> Parsers.BOOLEAN ColType.String -> Parsers.STRING - ColType.LocalDateTime -> if (useDeepHavenLocalDateTime) Parsers.DATETIME else null else -> null } @@ -385,8 +372,7 @@ internal fun ColType.toCsvParserOrNull(useDeepHavenLocalDateTime: Boolean): Pars * Converts a [ColType] to a [Parser] from the Deephaven CSV library. * If no direct [Parser] exists, it defaults to [Parsers.STRING] so that [DataFrame.parse] can handle it. */ -internal fun ColType.toCsvParser(useDeepHavenLocalDateTime: Boolean): Parser<*> = - toCsvParserOrNull(useDeepHavenLocalDateTime) ?: Parsers.STRING +internal fun ColType.toCsvParser(): Parser<*> = toCsvParserOrNull() ?: Parsers.STRING internal fun KType.toColType(): ColType = when (this.withNullability(false)) { diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 740abf10e4..d62bb53db2 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -12,17 +12,16 @@ import kotlin.reflect.typeOf public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITER) : SupportedDataFrameFormat { override fun readDataFrame(stream: InputStream, header: List): DataFrame<*> = - DataFrame.readCsv(inputStream = stream, header = header) + DataFrame.readCsv(inputStream = stream, header = header, delimiter = delimiter) override fun readDataFrame(file: File, header: List): DataFrame<*> = - DataFrame.readCsv(file = file, header = header) + DataFrame.readCsv(file = file, header = header, delimiter = delimiter) override fun acceptsExtension(ext: String): Boolean = ext == "csv" override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough - // if the user adds the dataframe-csv module, this will override old CSV reading method in DataFrame.read() - override val testOrder: Int = CSV().testOrder - 1 + override val testOrder: Int = 20_000 override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod { val arguments = MethodArguments().add("delimiter", typeOf(), "'%L'", delimiter) diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readCsv.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readCsv.kt index 4541261e64..814baa5718 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readCsv.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readCsv.kt @@ -22,8 +22,6 @@ import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.READ_LINES import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.SKIP_LINES import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.TRIM_INSIDE_QUOTED import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl -import org.jetbrains.kotlinx.dataframe.io.Compression -import org.jetbrains.kotlinx.dataframe.io.Compression.Companion import java.io.File import java.io.FileInputStream import java.io.InputStream @@ -67,15 +65,12 @@ import kotlin.io.path.inputStream * * [DataFrame.readCsvStr][readCsvStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readCSV][org.jetbrains.kotlinx.dataframe.io.readCSV]`()` functions. - * They'll hopefully be faster and better._ - * - * @param path The file path to read. Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + * @param path The file path to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. * @param delimiter The field delimiter character. Default: ','. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -116,8 +111,6 @@ import kotlin.io.path.inputStream * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -223,15 +216,12 @@ public fun DataFrame.Companion.readCsv( * * [DataFrame.readCsvStr][readCsvStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readCSV][org.jetbrains.kotlinx.dataframe.io.readCSV]`()` functions. - * They'll hopefully be faster and better._ - * - * @param file The file to read. Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + * @param file The file to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. * @param delimiter The field delimiter character. Default: ','. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -272,8 +262,6 @@ public fun DataFrame.Companion.readCsv( * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -379,15 +367,12 @@ public fun DataFrame.Companion.readCsv( * * [DataFrame.readCsvStr][readCsvStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readCSV][org.jetbrains.kotlinx.dataframe.io.readCSV]`()` functions. - * They'll hopefully be faster and better._ - * - * @param url The URL from which to fetch the data. Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + * @param url The URL from which to fetch the data. Can also be compressed as `.gz` or `.zip`, see [Compression]. * @param delimiter The field delimiter character. Default: ','. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -428,8 +413,6 @@ public fun DataFrame.Companion.readCsv( * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -535,15 +518,12 @@ public fun DataFrame.Companion.readCsv( * * [DataFrame.readCsvStr][readCsvStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readCSV][org.jetbrains.kotlinx.dataframe.io.readCSV]`()` functions. - * They'll hopefully be faster and better._ - * - * @param fileOrUrl The file path or URL to read the data from. Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + * @param fileOrUrl The file path or URL to read the data from. Can also be compressed as `.gz` or `.zip`, see [Compression]. * @param delimiter The field delimiter character. Default: ','. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -584,8 +564,6 @@ public fun DataFrame.Companion.readCsv( * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -692,15 +670,12 @@ public fun DataFrame.Companion.readCsv( * * [DataFrame.readCsvStr][readCsvStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readCSV][org.jetbrains.kotlinx.dataframe.io.readCSV]`()` functions. - * They'll hopefully be faster and better._ - * * @param inputStream Represents the file to read. * @param delimiter The field delimiter character. Default: ','. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -741,8 +716,6 @@ public fun DataFrame.Companion.readCsv( * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readCsvStr.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readCsvStr.kt index 0d007d2b3c..023dd509b7 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readCsvStr.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readCsvStr.kt @@ -56,9 +56,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl * * [DataFrame.readCsvStr][readCsvStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readCSV][org.jetbrains.kotlinx.dataframe.io.readCSV]`()` functions. - * They'll hopefully be faster and better._ - * * @param text The raw data to read in the form of a [String]. * @param delimiter The field delimiter character. Default: ','. * @@ -103,8 +100,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readDelim.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readDelim.kt index f100a1d474..329ef00cb5 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readDelim.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readDelim.kt @@ -22,7 +22,6 @@ import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.READ_LINES import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.SKIP_LINES import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.TRIM_INSIDE_QUOTED import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl -import org.jetbrains.kotlinx.dataframe.io.Compression import java.io.File import java.io.FileInputStream import java.io.InputStream @@ -72,15 +71,12 @@ import kotlin.io.path.inputStream * * [DataFrame.readDelimStr][readDelimStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readDelim][org.jetbrains.kotlinx.dataframe.io.readDelim]`()` functions. - * They'll hopefully be faster and better._ - * - * @param path The file path to read. Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + * @param path The file path to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. * @param delimiter The field delimiter character. Default: ','. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -121,8 +117,6 @@ import kotlin.io.path.inputStream * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -228,15 +222,12 @@ public fun DataFrame.Companion.readDelim( * * [DataFrame.readDelimStr][readDelimStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readDelim][org.jetbrains.kotlinx.dataframe.io.readDelim]`()` functions. - * They'll hopefully be faster and better._ - * - * @param file The file to read. Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + * @param file The file to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. * @param delimiter The field delimiter character. Default: ','. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -277,8 +268,6 @@ public fun DataFrame.Companion.readDelim( * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -384,15 +373,12 @@ public fun DataFrame.Companion.readDelim( * * [DataFrame.readDelimStr][readDelimStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readDelim][org.jetbrains.kotlinx.dataframe.io.readDelim]`()` functions. - * They'll hopefully be faster and better._ - * - * @param url The URL from which to fetch the data. Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + * @param url The URL from which to fetch the data. Can also be compressed as `.gz` or `.zip`, see [Compression]. * @param delimiter The field delimiter character. Default: ','. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -433,8 +419,6 @@ public fun DataFrame.Companion.readDelim( * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -540,15 +524,12 @@ public fun DataFrame.Companion.readDelim( * * [DataFrame.readDelimStr][readDelimStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readDelim][org.jetbrains.kotlinx.dataframe.io.readDelim]`()` functions. - * They'll hopefully be faster and better._ - * - * @param fileOrUrl The file path or URL to read the data from. Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + * @param fileOrUrl The file path or URL to read the data from. Can also be compressed as `.gz` or `.zip`, see [Compression]. * @param delimiter The field delimiter character. Default: ','. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -589,8 +570,6 @@ public fun DataFrame.Companion.readDelim( * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -697,15 +676,12 @@ public fun DataFrame.Companion.readDelim( * * [DataFrame.readDelimStr][readDelimStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readDelim][org.jetbrains.kotlinx.dataframe.io.readDelim]`()` functions. - * They'll hopefully be faster and better._ - * * @param inputStream Represents the file to read. * @param delimiter The field delimiter character. Default: ','. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -746,8 +722,6 @@ public fun DataFrame.Companion.readDelim( * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readDelimStr.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readDelimStr.kt index da43ad2b90..c9fb3a4618 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readDelimStr.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readDelimStr.kt @@ -56,9 +56,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl * * [DataFrame.readDelimStr][readDelimStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readDelim][org.jetbrains.kotlinx.dataframe.io.readDelim]`()` functions. - * They'll hopefully be faster and better._ - * * @param text The raw data to read in the form of a [String]. * @param delimiter The field delimiter character. Default: ','. * @@ -103,8 +100,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readTsv.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readTsv.kt index 51102f6a19..0acbede3e1 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readTsv.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readTsv.kt @@ -22,7 +22,6 @@ import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.SKIP_LINES import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.TRIM_INSIDE_QUOTED import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.TSV_DELIMITER import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl -import org.jetbrains.kotlinx.dataframe.io.Compression import java.io.File import java.io.FileInputStream import java.io.InputStream @@ -66,15 +65,12 @@ import kotlin.io.path.inputStream * * [DataFrame.readTsvStr][readTsvStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readTSV][org.jetbrains.kotlinx.dataframe.io.readTSV]`()` functions. - * They'll hopefully be faster and better._ - * - * @param path The file path to read. Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + * @param path The file path to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. * @param delimiter The field delimiter character. Default: '\t'. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -115,8 +111,6 @@ import kotlin.io.path.inputStream * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -222,15 +216,12 @@ public fun DataFrame.Companion.readTsv( * * [DataFrame.readTsvStr][readTsvStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readTSV][org.jetbrains.kotlinx.dataframe.io.readTSV]`()` functions. - * They'll hopefully be faster and better._ - * - * @param file The file to read. Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + * @param file The file to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. * @param delimiter The field delimiter character. Default: '\t'. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -271,8 +262,6 @@ public fun DataFrame.Companion.readTsv( * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -378,15 +367,12 @@ public fun DataFrame.Companion.readTsv( * * [DataFrame.readTsvStr][readTsvStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readTSV][org.jetbrains.kotlinx.dataframe.io.readTSV]`()` functions. - * They'll hopefully be faster and better._ - * - * @param url The URL from which to fetch the data. Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + * @param url The URL from which to fetch the data. Can also be compressed as `.gz` or `.zip`, see [Compression]. * @param delimiter The field delimiter character. Default: '\t'. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -427,8 +413,6 @@ public fun DataFrame.Companion.readTsv( * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -534,15 +518,12 @@ public fun DataFrame.Companion.readTsv( * * [DataFrame.readTsvStr][readTsvStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readTSV][org.jetbrains.kotlinx.dataframe.io.readTSV]`()` functions. - * They'll hopefully be faster and better._ - * - * @param fileOrUrl The file path or URL to read the data from. Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + * @param fileOrUrl The file path or URL to read the data from. Can also be compressed as `.gz` or `.zip`, see [Compression]. * @param delimiter The field delimiter character. Default: '\t'. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -583,8 +564,6 @@ public fun DataFrame.Companion.readTsv( * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to @@ -691,15 +670,12 @@ public fun DataFrame.Companion.readTsv( * * [DataFrame.readTsvStr][readTsvStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readTSV][org.jetbrains.kotlinx.dataframe.io.readTSV]`()` functions. - * They'll hopefully be faster and better._ - * * @param inputStream Represents the file to read. * @param delimiter The field delimiter character. Default: '\t'. * * Ignored if [hasFixedWidthColumns] is `true`. * @param compression The compression of the data. - * Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url. + * Default: [Compression.None], unless detected otherwise from the input file or url. * @param header Optional column titles. Default: empty list. * * If non-empty, the data will be read with [header] as the column titles @@ -740,8 +716,6 @@ public fun DataFrame.Companion.readTsv( * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readTsvStr.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readTsvStr.kt index 92f3c84f0e..ad735f7130 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readTsvStr.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readTsvStr.kt @@ -56,9 +56,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl * * [DataFrame.readTsvStr][readTsvStr]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old [DataFrame.readTSV][org.jetbrains.kotlinx.dataframe.io.readTSV]`()` functions. - * They'll hopefully be faster and better._ - * * @param text The raw data to read in the form of a [String]. * @param delimiter The field delimiter character. Default: '\t'. * @@ -103,8 +100,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl * ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. * * The only exceptions are: - * - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, - * regardless of the global setting. * - [nullStrings][ParserOptions.nullStrings], which, if `null`, * will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. * - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/toCsvStr.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/toCsvStr.kt index 23541c1e13..16e8a81155 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/toCsvStr.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/toCsvStr.kt @@ -35,10 +35,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl * * [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param delimiter The field delimiter character. Default: ','. * * Ignored if [hasFixedWidthColumns] is `true`. diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/toDelimStr.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/toDelimStr.kt index 90a731a593..82d25f7749 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/toDelimStr.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/toDelimStr.kt @@ -35,10 +35,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl * * [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param delimiter The field delimiter character. Default: ','. * * Ignored if [hasFixedWidthColumns] is `true`. diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/toTsvStr.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/toTsvStr.kt index d5d1867cc6..6e1430a0f0 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/toTsvStr.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/toTsvStr.kt @@ -35,10 +35,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl * * [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param delimiter The field delimiter character. Default: '\t'. * * Ignored if [hasFixedWidthColumns] is `true`. diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index 4d349aad72..a70136d7f4 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -12,17 +12,16 @@ import kotlin.reflect.typeOf public class TsvDeephaven(private val delimiter: Char = DelimParams.TSV_DELIMITER) : SupportedDataFrameFormat { override fun readDataFrame(stream: InputStream, header: List): DataFrame<*> = - DataFrame.readTsv(inputStream = stream, header = header) + DataFrame.readTsv(inputStream = stream, header = header, delimiter = delimiter) override fun readDataFrame(file: File, header: List): DataFrame<*> = - DataFrame.readTsv(file = file, header = header) + DataFrame.readTsv(file = file, header = header, delimiter = delimiter) override fun acceptsExtension(ext: String): Boolean = ext == "tsv" override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough - // if the user adds the dataframe-csv module, this will override old TSV reading method in DataFrame.read() - override val testOrder: Int = TSV().testOrder - 1 + override val testOrder: Int = 30_000 override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod { val arguments = MethodArguments().add("delimiter", typeOf(), "'%L'", delimiter) diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeCsv.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeCsv.kt index 1ffb25500f..4ce4f5518a 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeCsv.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeCsv.kt @@ -41,10 +41,6 @@ import kotlin.io.path.writer * * [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param path The path pointing to a file to write to. * @param delimiter The field delimiter character. Default: ','. * @@ -114,10 +110,6 @@ public fun AnyFrame.writeCsv( * * [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param file The file to write to. * @param delimiter The field delimiter character. Default: ','. * @@ -187,10 +179,6 @@ public fun AnyFrame.writeCsv( * * [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param path The path pointing to a file to write to. * @param delimiter The field delimiter character. Default: ','. * @@ -261,10 +249,6 @@ public fun AnyFrame.writeCsv( * * [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param writer The [Appendable] to write to. * @param delimiter The field delimiter character. Default: ','. * diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeDelim.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeDelim.kt index 9da5db2281..01a5e5c2e9 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeDelim.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeDelim.kt @@ -41,10 +41,6 @@ import kotlin.io.path.writer * * [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param path The path pointing to a file to write to. * @param delimiter The field delimiter character. Default: ','. * @@ -114,10 +110,6 @@ public fun AnyFrame.writeDelim( * * [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param file The file to write to. * @param delimiter The field delimiter character. Default: ','. * @@ -187,10 +179,6 @@ public fun AnyFrame.writeDelim( * * [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param path The path pointing to a file to write to. * @param delimiter The field delimiter character. Default: ','. * @@ -261,10 +249,6 @@ public fun AnyFrame.writeDelim( * * [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param writer The [Appendable] to write to. * @param delimiter The field delimiter character. Default: ','. * diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeTsv.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeTsv.kt index c94f6783d8..0d3f507b97 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeTsv.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeTsv.kt @@ -41,10 +41,6 @@ import kotlin.io.path.writer * * [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param path The path pointing to a file to write to. * @param delimiter The field delimiter character. Default: '\t'. * @@ -114,10 +110,6 @@ public fun AnyFrame.writeTsv( * * [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param file The file to write to. * @param delimiter The field delimiter character. Default: '\t'. * @@ -187,10 +179,6 @@ public fun AnyFrame.writeTsv( * * [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param path The path pointing to a file to write to. * @param delimiter The field delimiter character. Default: '\t'. * @@ -261,10 +249,6 @@ public fun AnyFrame.writeTsv( * * [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @param writer The [Appendable] to write to. * @param delimiter The field delimiter character. Default: '\t'. * diff --git a/dataframe-csv/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt b/dataframe-csv/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt index f845f521b0..a5903eab10 100644 --- a/dataframe-csv/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt +++ b/dataframe-csv/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt @@ -1,10 +1,12 @@ package org.jetbrains.kotlinx.dataframe.io +import io.deephaven.csv.parsers.Parsers import io.kotest.assertions.throwables.shouldNotThrowAny import io.kotest.assertions.throwables.shouldThrow import io.kotest.matchers.collections.shouldContainInOrder import io.kotest.matchers.nulls.shouldNotBeNull import io.kotest.matchers.shouldBe +import kotlinx.datetime.Instant import kotlinx.datetime.LocalDate import kotlinx.datetime.LocalDateTime import org.intellij.lang.annotations.Language @@ -120,7 +122,7 @@ class DelimCsvTsvTests { fun `read custom compression Csv`() { DataFrame.readCsv( simpleCsvGz, - compression = Compression.Custom { GZIPInputStream(it) }, + compression = Compression(::GZIPInputStream), ) shouldBe DataFrame.readCsv(simpleCsv) } @@ -315,15 +317,15 @@ class DelimCsvTsvTests { fun `check integrity of example data`() { shouldThrow { // cannot read file with blank line at the start - DataFrame.readCsv("../data/jetbrains_repositories.csv") + DataFrame.readCsv("../data/jetbrains repositories.csv") } shouldThrow { // ignoreEmptyLines only ignores intermediate empty lines - DataFrame.readCsv("../data/jetbrains_repositories.csv", ignoreEmptyLines = true) + DataFrame.readCsv("../data/jetbrains repositories.csv", ignoreEmptyLines = true) } val df = DataFrame.readCsv( - "../data/jetbrains_repositories.csv", + "../data/jetbrains repositories.csv", skipLines = 1, // we need to skip the empty lines manually ) df.columnNames() shouldBe listOf("full_name", "html_url", "stargazers_count", "topics", "watchers") @@ -334,10 +336,8 @@ class DelimCsvTsvTests { typeOf(), typeOf(), ) - df shouldBe DataFrame.readCsv( - "../data/jetbrains repositories.csv", - skipLines = 1, - ) + // same file without empty line at the beginning + df shouldBe DataFrame.readCsv("../data/jetbrains_repositories.csv") } @Test @@ -750,6 +750,44 @@ class DelimCsvTsvTests { DataFrame.parser.resetToDefault() } + // Issue #1047 + @Test + fun `Only use Deephaven datetime parser with custom csv specs`() { + @Language("csv") + val csvContent = + """ + with_timezone_offset,without_timezone_offset + 2024-12-12T13:00:00+01:00,2024-12-12T13:00:00 + """.trimIndent() + + // use DFs parsers by default for datetime-like columns + val df1 = DataFrame.readCsvStr(csvContent) + df1["with_timezone_offset"].let { + it.type() shouldBe typeOf() + it[0] shouldBe Instant.parse("2024-12-12T13:00:00+01:00") + } + df1["without_timezone_offset"].let { + it.type() shouldBe typeOf() + it[0] shouldBe LocalDateTime.parse("2024-12-12T13:00:00") + } + + // enable fast datetime parser for the first column with adjustCsvSpecs + val df2 = DataFrame.readCsv( + inputStream = csvContent.byteInputStream(), + adjustCsvSpecs = { + putParserForName("with_timezone_offset", Parsers.DATETIME) + }, + ) + df2["with_timezone_offset"].let { + it.type() shouldBe typeOf() + it[0] shouldBe LocalDateTime.parse("2024-12-12T12:00:00") + } + df2["without_timezone_offset"].let { + it.type() shouldBe typeOf() + it[0] shouldBe LocalDateTime.parse("2024-12-12T13:00:00") + } + } + companion object { private val irisDataset = testCsv("irisDataset") private val simpleCsv = testCsv("testCSV") diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/CommonReadDelimDocs.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/CommonReadDelimDocs.kt index d8cd380ad9..f2b11109d2 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/CommonReadDelimDocs.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/CommonReadDelimDocs.kt @@ -44,9 +44,6 @@ import java.net.URL * * $[STR_FUNCTION_LINK]`("a,b,c", delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old $[OLD_FUNCTION_LINK]`()` functions. - * They'll hopefully be faster and better._ - * * @comment Some helper arguments for the function links * @set [FUNCTION_LINK] \[DataFrame.${[FUNCTION_NAME]}\]\[${[FUNCTION_NAME]}\] * @set [STR_FUNCTION_LINK] \[DataFrame.${[FUNCTION_NAME]}Str\]\[${[FUNCTION_NAME]}Str\] diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/CommonWriteDelimDocs.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/CommonWriteDelimDocs.kt index fa85ebf5e6..0c8cade34e 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/CommonWriteDelimDocs.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/CommonWriteDelimDocs.kt @@ -4,8 +4,6 @@ package org.jetbrains.kotlinx.dataframe.documentation import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL -import org.jetbrains.kotlinx.dataframe.io.toCsv -import org.jetbrains.kotlinx.dataframe.io.writeCSV import java.io.File /** @@ -31,10 +29,6 @@ import java.io.File * * $[TO_STR_FUNCTION_LINK]`(delimiter = ",")` * - * _**NOTE EXPERIMENTAL**: This is a new set of functions, replacing the old - * [DataFrame.writeCSV][writeCSV]`()` and [DataFrame.toCsv][toCsv]`()` functions. - * They'll hopefully be better._ - * * @comment Some helper arguments for the function links * @set [FUNCTION_LINK] \[DataFrame.${[FUNCTION_NAME]}\]\[${[FUNCTION_NAME]}\] * @set [TO_STR_FUNCTION_LINK] \[DataFrame.${[TO_STR_FUNCTION_NAME]}\]\[${[TO_STR_FUNCTION_NAME]}\] diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt index c0f39c79c3..93bdcb6d5c 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt @@ -4,7 +4,6 @@ import io.deephaven.csv.CsvSpecs import org.apache.commons.csv.CSVFormat import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.ParserOptions -import org.jetbrains.kotlinx.dataframe.api.parser import org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs @@ -19,16 +18,28 @@ import org.jetbrains.kotlinx.dataframe.io.QuoteMode @Suppress("ktlint:standard:class-naming", "ClassName", "KDocUnresolvedReference") internal object DelimParams { - /** @param path The file path to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. */ + /** + * @param path The file path to read. + * Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + */ interface PATH_READ - /** @param file The file to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. */ + /** + * @param file The file to read. + * Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + */ interface FILE_READ - /** @param url The URL from which to fetch the data. Can also be compressed as `.gz` or `.zip`, see [Compression]. */ + /** + * @param url The URL from which to fetch the data. + * Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + */ interface URL_READ - /** @param fileOrUrl The file path or URL to read the data from. Can also be compressed as `.gz` or `.zip`, see [Compression]. */ + /** + * @param fileOrUrl The file path or URL to read the data from. + * Can also be compressed as `.gz` or `.zip`, see [Compression][org.jetbrains.kotlinx.dataframe.io.Compression]. + */ interface FILE_OR_URL_READ /** @param inputStream Represents the file to read. */ diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt index dec844836c..cc4b9526fb 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt @@ -62,7 +62,6 @@ import java.io.InputStream import java.math.BigDecimal import java.math.BigInteger import java.net.URL -import java.util.Locale import kotlin.reflect.KType import kotlin.reflect.full.withNullability import kotlin.reflect.typeOf @@ -131,14 +130,7 @@ internal fun readDelimImpl( hasFixedWidthColumns(hasFixedWidthColumns) if (hasFixedWidthColumns && fixedColumnWidths.isNotEmpty()) fixedColumnWidths(fixedColumnWidths) skipLines(takeHeaderFromCsv = header.isEmpty(), skipLines = skipLines) - - // Deephaven's LocalDateTime parser is unconfigurable, so if the user provides a locale, pattern, or formatter - // that's not compatible, we must use our own parser for LocalDateTime and let Deephaven read them as Strings. - val useDeepHavenLocalDateTime = - (parserOptions?.locale ?: DataFrame.parser.locale) in setOf(Locale.ROOT, Locale.US, Locale.ENGLISH) && - parserOptions?.dateTimePattern == null && - parserOptions?.dateTimeFormatter == null - parsers(parserOptions, colTypes, useDeepHavenLocalDateTime) + parsers(parserOptions, colTypes) adjustCsvSpecs(this, this) }.build() @@ -271,7 +263,7 @@ private fun CsvSpecs.Builder.skipLines(takeHeaderFromCsv: Boolean, skipLines: Lo * Logic overview: * * - if no [colTypes] are given - * - let deephaven use all its [default parsers][Parsers.DEFAULT] + * - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME] * - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied * - if [colTypes] are supplied * - if [ColType.DEFAULT] is among the values @@ -279,33 +271,35 @@ private fun CsvSpecs.Builder.skipLines(takeHeaderFromCsv: Boolean, skipLines: Lo * - let deephaven use _only_ the parser given as [ColType.DEFAULT] type * - if [ColType.DEFAULT] is not among the values * - set the parser for each supplied column+coltype - * - let deephaven use all its [default parsers][Parsers.DEFAULT] + * - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME] * - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied * + * We will not use [Deephaven's DateTime parser][Parsers.DATETIME]. + * This is done to avoid different behavior compared to [DataFrame.parse]; + * Deephaven parses [Instant] as [LocalDateTime]. [Issue #1047](https://github.com/Kotlin/dataframe/issues/1047) + * * Note that `skipTypes` will never skip a type explicitly set by `colTypes`. * This is intended. */ -private fun CsvSpecs.Builder.parsers( - parserOptions: ParserOptions?, - colTypes: Map, - useDeepHavenLocalDateTime: Boolean, -): CsvSpecs.Builder { +private fun CsvSpecs.Builder.parsers(parserOptions: ParserOptions?, colTypes: Map): CsvSpecs.Builder { for ((colName, colType) in colTypes) { if (colName == ColType.DEFAULT) continue - putParserForName(colName, colType.toCsvParser(useDeepHavenLocalDateTime)) + putParserForName(colName, colType.toCsvParser()) } + // BOOLEAN, INT, LONG, DOUBLE, CHAR, STRING + val defaultParsers = Parsers.DEFAULT - Parsers.DATETIME val skipTypes = parserOptions?.skipTypes ?: DataFrame.parser.skipTypes val parsersToUse = when { ColType.DEFAULT in colTypes -> - listOf(colTypes[ColType.DEFAULT]!!.toCsvParser(useDeepHavenLocalDateTime)) + listOf(colTypes[ColType.DEFAULT]!!.toCsvParser(), Parsers.STRING) skipTypes.isNotEmpty() -> { val parsersToSkip = skipTypes - .mapNotNull { it.toColType().toCsvParserOrNull(useDeepHavenLocalDateTime) } - Parsers.DEFAULT.toSet() - parsersToSkip.toSet() + .mapNotNull { it.toColType().toCsvParserOrNull() } + defaultParsers.toSet() - parsersToSkip.toSet() } - else -> Parsers.DEFAULT // BOOLEAN, INT, LONG, DOUBLE, DATETIME, CHAR, STRING + else -> defaultParsers } parsers(parsersToUse) return this @@ -324,7 +318,7 @@ private fun CsvSpecs.Builder.header(header: List): CsvSpecs.Builder = * Converts a [ColType] to a [Parser] from the Deephaven CSV library. * If no direct [Parser] exists, it returns `null`. */ -internal fun ColType.toCsvParserOrNull(useDeepHavenLocalDateTime: Boolean): Parser<*>? = +internal fun ColType.toCsvParserOrNull(): Parser<*>? = when (this) { ColType.Int -> Parsers.INT ColType.Long -> Parsers.LONG @@ -332,7 +326,6 @@ internal fun ColType.toCsvParserOrNull(useDeepHavenLocalDateTime: Boolean): Pars ColType.Char -> Parsers.CHAR ColType.Boolean -> Parsers.BOOLEAN ColType.String -> Parsers.STRING - ColType.LocalDateTime -> if (useDeepHavenLocalDateTime) Parsers.DATETIME else null else -> null } @@ -340,8 +333,7 @@ internal fun ColType.toCsvParserOrNull(useDeepHavenLocalDateTime: Boolean): Pars * Converts a [ColType] to a [Parser] from the Deephaven CSV library. * If no direct [Parser] exists, it defaults to [Parsers.STRING] so that [DataFrame.parse] can handle it. */ -internal fun ColType.toCsvParser(useDeepHavenLocalDateTime: Boolean): Parser<*> = - toCsvParserOrNull(useDeepHavenLocalDateTime) ?: Parsers.STRING +internal fun ColType.toCsvParser(): Parser<*> = toCsvParserOrNull() ?: Parsers.STRING internal fun KType.toColType(): ColType = when (this.withNullability(false)) { diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Compression.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Compression.kt deleted file mode 100644 index 5ab1734a91..0000000000 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Compression.kt +++ /dev/null @@ -1,107 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.io - -import java.io.File -import java.io.InputStream -import java.net.URL -import java.nio.file.Path -import java.util.zip.GZIPInputStream -import java.util.zip.InflaterInputStream -import java.util.zip.ZipInputStream - -/** - * Compression algorithm to use when reading csv files. - * We support [GZIP][Compression.Gzip] and [ZIP][Compression.Zip] compression out of the box. - * - * Custom decompression algorithms can be added by creating an instance of [Custom]. - * - * @param wrapStream function that wraps any [InputStream] into a decompressing [InflaterInputStream] stream - */ -public sealed class Compression(public open val wrapStream: (InputStream) -> I) { - - public companion object { - public fun of(fileOrUrl: String): Compression<*> = - when (fileOrUrl.split(".").last()) { - "gz" -> Gzip - "zip" -> Zip - else -> None - } - - public fun of(file: File): Compression<*> = of(file.name) - - public fun of(path: Path): Compression<*> = of(path.fileName?.toString() ?: "") - - public fun of(url: URL): Compression<*> = of(url.path) - } - - /** Can be overridden to perform some actions before reading from the input stream. */ - public open fun doFirst(inputStream: I) {} - - /** - * Can be overridden to perform some actions after reading from the input stream. - * Remember to close the stream if you override this function. - */ - public open fun doFinally(inputStream: I) { - inputStream.close() - } - - /** - * For .gz / GZIP files. - */ - public data object Gzip : Compression(wrapStream = ::GZIPInputStream) - - /** - * For .zip / ZIP files. - */ - public data object Zip : Compression(wrapStream = ::ZipInputStream) { - - override fun doFirst(inputStream: ZipInputStream) { - // Make sure to call nextEntry once to prepare the stream - if (inputStream.nextEntry == null) error("No entries in zip file") - } - - override fun doFinally(inputStream: ZipInputStream) { - // Check we don't have more than one entry in the zip file - if (inputStream.nextEntry != null) { - inputStream.close() - throw IllegalArgumentException("Zip file contains more than one entry") - } - inputStream.close() - } - } - - /** - * No compression. - */ - public data object None : Compression(wrapStream = { it }) - - /** - * Custom decompression algorithm. - * - * Can either be extended or instantiated directly with a custom [wrapStream] function. - * @param wrapStream function that wraps any [InputStream] into a decompressing [InputStream] - */ - public open class Custom(override val wrapStream: (InputStream) -> I) : - Compression(wrapStream = wrapStream) { - override fun toString(): String = "Compression.Custom(wrapStream = $wrapStream)" - } -} - -/** - * Decompresses the input stream with the given compression algorithm. - * - * Also closes the stream after the block is executed. - */ -public inline fun InputStream.useDecompressed( - compression: Compression, - block: (InputStream) -> T, -): T { - // first wrap the stream by (optional) compression algorithm - val wrappedStream = compression.wrapStream(this) - compression.doFirst(wrappedStream) - - try { - return block(wrappedStream) - } finally { - compression.doFinally(wrappedStream) - } -} diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 740abf10e4..d62bb53db2 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -12,17 +12,16 @@ import kotlin.reflect.typeOf public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITER) : SupportedDataFrameFormat { override fun readDataFrame(stream: InputStream, header: List): DataFrame<*> = - DataFrame.readCsv(inputStream = stream, header = header) + DataFrame.readCsv(inputStream = stream, header = header, delimiter = delimiter) override fun readDataFrame(file: File, header: List): DataFrame<*> = - DataFrame.readCsv(file = file, header = header) + DataFrame.readCsv(file = file, header = header, delimiter = delimiter) override fun acceptsExtension(ext: String): Boolean = ext == "csv" override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough - // if the user adds the dataframe-csv module, this will override old CSV reading method in DataFrame.read() - override val testOrder: Int = CSV().testOrder - 1 + override val testOrder: Int = 20_000 override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod { val arguments = MethodArguments().add("delimiter", typeOf(), "'%L'", delimiter) diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index 4d349aad72..a70136d7f4 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -12,17 +12,16 @@ import kotlin.reflect.typeOf public class TsvDeephaven(private val delimiter: Char = DelimParams.TSV_DELIMITER) : SupportedDataFrameFormat { override fun readDataFrame(stream: InputStream, header: List): DataFrame<*> = - DataFrame.readTsv(inputStream = stream, header = header) + DataFrame.readTsv(inputStream = stream, header = header, delimiter = delimiter) override fun readDataFrame(file: File, header: List): DataFrame<*> = - DataFrame.readTsv(file = file, header = header) + DataFrame.readTsv(file = file, header = header, delimiter = delimiter) override fun acceptsExtension(ext: String): Boolean = ext == "tsv" override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough - // if the user adds the dataframe-csv module, this will override old TSV reading method in DataFrame.read() - override val testOrder: Int = TSV().testOrder - 1 + override val testOrder: Int = 30_000 override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod { val arguments = MethodArguments().add("delimiter", typeOf(), "'%L'", delimiter) diff --git a/dataframe-csv/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt b/dataframe-csv/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt index a8171575e0..31abcfd041 100644 --- a/dataframe-csv/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt +++ b/dataframe-csv/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt @@ -1,10 +1,12 @@ package org.jetbrains.kotlinx.dataframe.io +import io.deephaven.csv.parsers.Parsers import io.kotest.assertions.throwables.shouldNotThrowAny import io.kotest.assertions.throwables.shouldThrow import io.kotest.matchers.collections.shouldContainInOrder import io.kotest.matchers.nulls.shouldNotBeNull import io.kotest.matchers.shouldBe +import kotlinx.datetime.Instant import kotlinx.datetime.LocalDate import kotlinx.datetime.LocalDateTime import org.intellij.lang.annotations.Language @@ -120,7 +122,7 @@ class DelimCsvTsvTests { fun `read custom compression Csv`() { DataFrame.readCsv( simpleCsvGz, - compression = Compression.Custom { GZIPInputStream(it) }, + compression = Compression(::GZIPInputStream), ) shouldBe DataFrame.readCsv(simpleCsv) } @@ -516,29 +518,32 @@ class DelimCsvTsvTests { dutchDf["price"].type() shouldBe typeOf() - // while negative numbers in RTL languages cannot be parsed, thanks to Java, others work - @Language("csv") - val arabicCsv = - """ - الاسم; السعر; - أ;١٢٫٤٥; - ب;١٣٫٣٥; - ج;١٠٠٫١٢٣; - د;٢٠٤٫٢٣٥; - هـ;ليس رقم; - و;null; - """.trimIndent() - - val easternArabicDf = DataFrame.readCsvStr( - arabicCsv, - delimiter = ';', - parserOptions = ParserOptions( - locale = Locale.forLanguageTag("ar-001"), - ), - ) + // skipping this test on windows due to lack of support for Arabic locales + if (!System.getProperty("os.name").startsWith("Windows")) { + // while negative numbers in RTL languages cannot be parsed thanks to Java, others work + @Language("csv") + val arabicCsv = + """ + الاسم; السعر; + أ;١٢٫٤٥; + ب;١٣٫٣٥; + ج;١٠٠٫١٢٣; + د;٢٠٤٫٢٣٥; + هـ;ليس رقم; + و;null; + """.trimIndent() + + val easternArabicDf = DataFrame.readCsvStr( + arabicCsv, + delimiter = ';', + parserOptions = ParserOptions( + locale = Locale.forLanguageTag("ar-001"), + ), + ) - easternArabicDf["السعر"].type() shouldBe typeOf() - easternArabicDf["الاسم"].type() shouldBe typeOf() // apparently not a char + easternArabicDf["السعر"].type() shouldBe typeOf() + easternArabicDf["الاسم"].type() shouldBe typeOf() // apparently not a char + } } @Test @@ -748,6 +753,44 @@ class DelimCsvTsvTests { DataFrame.parser.resetToDefault() } + // Issue #1047 + @Test + fun `Only use Deephaven datetime parser with custom csv specs`() { + @Language("csv") + val csvContent = + """ + with_timezone_offset,without_timezone_offset + 2024-12-12T13:00:00+01:00,2024-12-12T13:00:00 + """.trimIndent() + + // use DFs parsers by default for datetime-like columns + val df1 = DataFrame.readCsvStr(csvContent) + df1["with_timezone_offset"].let { + it.type() shouldBe typeOf() + it[0] shouldBe Instant.parse("2024-12-12T13:00:00+01:00") + } + df1["without_timezone_offset"].let { + it.type() shouldBe typeOf() + it[0] shouldBe LocalDateTime.parse("2024-12-12T13:00:00") + } + + // enable fast datetime parser for the first column with adjustCsvSpecs + val df2 = DataFrame.readCsv( + inputStream = csvContent.byteInputStream(), + adjustCsvSpecs = { + putParserForName("with_timezone_offset", Parsers.DATETIME) + }, + ) + df2["with_timezone_offset"].let { + it.type() shouldBe typeOf() + it[0] shouldBe LocalDateTime.parse("2024-12-12T12:00:00") + } + df2["without_timezone_offset"].let { + it.type() shouldBe typeOf() + it[0] shouldBe LocalDateTime.parse("2024-12-12T13:00:00") + } + } + companion object { private val irisDataset = testCsv("irisDataset") private val simpleCsv = testCsv("testCSV") diff --git a/dataframe-geo/build.gradle.kts b/dataframe-geo/build.gradle.kts index 1309ba8e73..3912c15d10 100644 --- a/dataframe-geo/build.gradle.kts +++ b/dataframe-geo/build.gradle.kts @@ -18,6 +18,7 @@ repositories { // geo repository should come before Maven Central maven("https://repo.osgeo.org/repository/release") mavenCentral() + mavenLocal() } // https://stackoverflow.com/questions/26993105/i-get-an-error-downloading-javax-media-jai-core1-1-3-from-maven-central diff --git a/docs/StardustDocs/topics/gettingStartedGradleAdvanced.md b/docs/StardustDocs/topics/gettingStartedGradleAdvanced.md index e527d2dc17..3eb1fca8d3 100644 --- a/docs/StardustDocs/topics/gettingStartedGradleAdvanced.md +++ b/docs/StardustDocs/topics/gettingStartedGradleAdvanced.md @@ -124,6 +124,7 @@ dependencies { // Artifact containing all APIs and implementations implementation("org.jetbrains.kotlinx:dataframe-core:%dataFrameVersion%") // Optional formats support + implementation("org.jetbrains.kotlinx:dataframe-csv:%dataFrameVersion%") implementation("org.jetbrains.kotlinx:dataframe-excel:%dataFrameVersion%") implementation("org.jetbrains.kotlinx:dataframe-jdbc:%dataFrameVersion%") implementation("org.jetbrains.kotlinx:dataframe-arrow:%dataFrameVersion%") diff --git a/docs/StardustDocs/topics/overview.md b/docs/StardustDocs/topics/overview.md index b58534fbf8..fe29460432 100644 --- a/docs/StardustDocs/topics/overview.md +++ b/docs/StardustDocs/topics/overview.md @@ -50,7 +50,7 @@ Thus, **Basics:** ```kotlin -val df = DataFrame.readCSV("titanic.csv", delimiter = ';') +val df = DataFrame.readCsv("titanic.csv", delimiter = ';') ``` ```kotlin diff --git a/docs/StardustDocs/topics/read.md b/docs/StardustDocs/topics/read.md index e664f90587..7843c90a63 100644 --- a/docs/StardustDocs/topics/read.md +++ b/docs/StardustDocs/topics/read.md @@ -17,30 +17,20 @@ The input string can be a file path or URL. ## Read from CSV -To read a CSV file, use the `.readCSV()` function. +Before you can read data from CSV, make sure you have the following dependency: + +```kotlin +implementation("org.jetbrains.kotlinx:dataframe-csv:$dataframe_version") +``` + +It's included by default if you have `org.jetbrains.kotlinx:dataframe:$dataframe_version` already. + +To read a CSV file, use the `.readCsv()` function. + +Since DataFrame v0.15, this new CSV integration is available. +It is faster and more flexible than the old one, now being based on +[Deephaven CSV](https://github.com/deephaven/deephaven-csv). -> Since DataFrame v0.15, a new experimental CSV integration is available. -> It is faster and more flexible than the old one, now being based on -> [Deephaven CSV](https://github.com/deephaven/deephaven-csv). -> -> To try it in your Kotlin project, add the dependency: -> -> [`org.jetbrains.kotlinx:dataframe-csv:$dataframe_version`](https://central.sonatype.com/artifact/org.jetbrains.kotlinx/dataframe-csv). -> -> To try it in your Kotlin Notebook, modify the %use-magic directive: -> -> `%use dataFrame(enableExperimentalCsv=true)`. -> -> And then use the new `DataFrame.readCsv()` / `DataFrame.readTsv()` / `DataFrame.readDelim()` -> functions over the old `DataFrame.readCSV()` ones. -> -> The documentation of the old CSV integration still applies to the new one. -> We will expand it while the new CSV integration stabilizes. -> -> In the meantime, check out this -> [example notebook](https://github.com/Kotlin/dataframe/blob/0.15.0/examples/notebooks/feature_overviews/0.15/new_features.ipynb) -> to see the new CSV integration in action. -> {style="note"} To read a CSV file from a file: @@ -48,9 +38,9 @@ To read a CSV file from a file: ```kotlin import java.io.File -DataFrame.readCSV("input.csv") +DataFrame.readCsv("input.csv") // Alternatively -DataFrame.readCSV(File("input.csv")) +DataFrame.readCsv(File("input.csv")) ``` To read a CSV file from a URL: @@ -58,7 +48,22 @@ To read a CSV file from a URL: ```kotlin import java.net.URL -DataFrame.readCSV(URL("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv")) +DataFrame.readCsv(URL("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv")) +``` + +Zip and GZip files are supported as well. + +To read CSV from `String`: + +```kotlin +val csv = """ + A,B,C,D + 12,tuv,0.12,true + 41,xyz,3.6,not assigned + 89,abc,7.1,false +""".trimIndent() + +DataFrame.readCsvStr(csv) ``` ### Specify delimiter @@ -68,7 +73,7 @@ By default, CSV files are parsed using `,` as the delimiter. To specify a custom ```kotlin -val df = DataFrame.readCSV( +val df = DataFrame.readCsv( file, delimiter = '|', header = listOf("A", "B", "C", "D"), @@ -78,9 +83,19 @@ val df = DataFrame.readCSV( +Aside from the delimiter, there are many other parameters to change. +These include the header, the number of rows to skip, the number of rows to read, the quote character, and more. +Check out the KDocs for more information. + ### Column type inference from CSV -Column types are inferred from the CSV data. Suppose that the CSV from the previous +Column types are inferred from the CSV data. + +We rely on the fast implementation of [Deephaven CSV](https://github.com/deephaven/deephaven-csv) for inferring and +parsing to (nullable) `Int`, `Long`, `Double`, and `Boolean` types. +For other types we fall back to [the parse operation](parse.md). + +Suppose that the CSV from the previous example had the following content: @@ -99,7 +114,7 @@ C: Double D: Boolean? ``` -[`DataFrame`](DataFrame.md) tries to parse columns as JSON, so when reading the following table with JSON object in column D: +[`DataFrame`](DataFrame.md) can [parse](parse.md) columns as JSON too, so when reading the following table with JSON object in column D:
@@ -107,7 +122,7 @@ D: Boolean?
AD
41{"B":3,"C":2}
-We get this data schema where D is [`ColumnGroup`](DataColumn.md#columngroup) with 2 children columns: +We get this data schema where D is [`ColumnGroup`](DataColumn.md#columngroup) with two nested columns: ```text A: Int @@ -141,15 +156,15 @@ Sometimes columns in your CSV can be interpreted differently depending on your s 41,111 -Here a comma can be decimal or thousands separator, thus different values. -You can deal with it in two ways: +Here a comma can be a decimal-, or thousands separator, and thus become different values. +You can deal with it in multiple ways, for instance: -1) Provide locale as a parser option +1) Provide locale as parser option ```kotlin -val df = DataFrame.readCSV( +val df = DataFrame.readCsv( file, parserOptions = ParserOptions(locale = Locale.UK), ) @@ -162,7 +177,7 @@ val df = DataFrame.readCSV( ```kotlin -val df = DataFrame.readCSV( +val df = DataFrame.readCsv( file, colTypes = mapOf("colName" to ColType.String), ) @@ -184,28 +199,31 @@ Like: Because the format here `"dd/MMM/yy h:mm a"` differs from the default (`ISO_LOCAL_DATE_TIME`), columns like this may be recognized as simple `String` values rather than actual date-time columns. -You can fix this whenever you [parse](parse.md) a string-based column (e.g., using [`DataFrame.readCSV()`](read.md#read-from-csv), -[`DataFrame.readTSV()`](read.md#read-from-csv), or [`DataColumn.convertTo<>()`](convert.md)) by providing -a custom date-time pattern. There are two ways to do this: +You can fix this whenever you [parse](parse.md) a string-based column (e.g., using [`DataFrame.readCsv()`](read.md#read-from-csv), +[`DataFrame.readTsv()`](read.md#read-from-csv), or [`DataColumn.convertTo<>()`](convert.md)) by providing +a custom date-time pattern. + +There are two ways to do this: 1) By providing the date-time pattern as raw string to the `ParserOptions` argument: - + ```kotlin -val df = DataFrame.readCSV( +val df = DataFrame.readCsv( file, parserOptions = ParserOptions(dateTimePattern = "dd/MMM/yy h:mm a") ) ``` + 2) By providing a `DateTimeFormatter` to the `ParserOptions` argument: - + ```kotlin -val df = DataFrame.readCSV( +val df = DataFrame.readCsv( file, parserOptions = ParserOptions(dateTimeFormatter = DateTimeFormatter.ofPattern("dd/MMM/yy h:mm a")) ) @@ -222,9 +240,53 @@ The result will be a dataframe with properly parsed `DateTime` columns. > > For more details on the parse operation, see the [`parse operation`](parse.md). +### Provide a default type for all columns + +While you can provide a `ColType` per column, you might not +always know how many columns there are or what their names are. +In such cases, you can disable type inference for all columns +by providing a default type for all columns: + + + +```kotlin +val df = DataFrame.readCsv( + file, + colTypes = mapOf(ColType.DEFAULT to ColType.String), +) +``` + + + +This default can be combined with specific types for other columns as well. + +### Unlocking Deephaven CSV features + +For each group of functions (`readCsv`, `readDelim`, `readTsv`, etc.) +we provide one overload which has the `adjustCsvSpecs` parameter. +This is an advanced option because it exposes the +[CsvSpecs.Builder](https://github.com/deephaven/deephaven-csv/blob/main/src/main/java/io/deephaven/csv/CsvSpecs.java) +of the underlying Deephaven implementation. +Generally, we don't recommend using this feature unless there's no other way to achieve your goal. + +For example, to enable the (unconfigurable but) very fast [ISO DateTime Parser of Deephaven CSV](https://medium.com/@deephavendatalabs/a-high-performance-csv-reader-with-type-inference-4bf2e4baf2d1): + + + +```kotlin +val df = DataFrame.readCsv( + inputStream = file.openStream(), + adjustCsvSpecs = { // it: CsvSpecs.Builder + it.putParserForName("date", Parsers.DATETIME) + }, +) +``` + + + ## Read from JSON -To read a JSON file, use the `.readJSON()` function. JSON files can be read from a file or a URL. +To read a JSON file, use the `.readJson()` function. JSON files can be read from a file or a URL. Note that after reading a JSON with a complex structure, you can get hierarchical [`DataFrame`](DataFrame.md): [`DataFrame`](DataFrame.md) with `ColumnGroup`s and [`FrameColumn`](DataColumn.md#framecolumn)s. @@ -452,6 +514,8 @@ Before you can read data from Excel, add the following dependency: implementation("org.jetbrains.kotlinx:dataframe-excel:$dataframe_version") ``` +It's included by default if you have `org.jetbrains.kotlinx:dataframe:$dataframe_version` already. + To read an Excel spreadsheet, use the `.readExcel()` function. Excel spreadsheets can be read from a file or a URL. Supported Excel spreadsheet formats are: xls, xlsx. @@ -502,6 +566,8 @@ Before you can read data from Apache Arrow format, add the following dependency: implementation("org.jetbrains.kotlinx:dataframe-arrow:$dataframe_version") ``` +It's included by default if you have `org.jetbrains.kotlinx:dataframe:$dataframe_version` already. + To read Apache Arrow formats, use the `.readArrowFeather()` function: diff --git a/docs/StardustDocs/topics/schemasGradle.md b/docs/StardustDocs/topics/schemasGradle.md index 07ce324100..0296bcc0f7 100644 --- a/docs/StardustDocs/topics/schemasGradle.md +++ b/docs/StardustDocs/topics/schemasGradle.md @@ -141,8 +141,8 @@ After `assemble`, the following code should compile and run: ```kotlin -// Repository.readCSV() has argument 'path' with default value https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv -val df = Repository.readCSV() +// Repository.readCsv() has argument 'path' with default value https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv +val df = Repository.readCsv() // Use generated properties to access data in rows df.maxBy { stargazersCount }.print() // Or to access columns in dataframe. diff --git a/docs/StardustDocs/topics/write.md b/docs/StardustDocs/topics/write.md index aef2404fff..74895026c2 100644 --- a/docs/StardustDocs/topics/write.md +++ b/docs/StardustDocs/topics/write.md @@ -13,7 +13,7 @@ Values of [`ColumnGroup`](DataColumn.md#columngroup), [`FrameColumn`](DataColumn ```kotlin -df.writeCSV(file) +df.writeCsv(file) ``` @@ -21,8 +21,7 @@ df.writeCSV(file) ```kotlin -val format = CSVFormat.DEFAULT.builder().setDelimiter(';').setRecordSeparator(System.lineSeparator()).build() -val csvStr = df.toCsv(format) +val csvStr = df.toCsvStr(delimiter = ';', recordSeparator = System.lineSeparator()) ``` diff --git a/examples/idea-examples/titanic/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/titanic/ml/titanic.kt b/examples/idea-examples/titanic/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/titanic/ml/titanic.kt index 9ea9120de0..b29fd1625a 100644 --- a/examples/idea-examples/titanic/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/titanic/ml/titanic.kt +++ b/examples/idea-examples/titanic/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/titanic/ml/titanic.kt @@ -32,7 +32,7 @@ fun main() { // Set Locale for correct number parsing Locale.setDefault(Locale.FRANCE) - val df = Passenger.readCSV() + val df = Passenger.readCsv() // Calculating imputing values val (train, test) = df diff --git a/examples/notebooks/netflix/netflix.ipynb b/examples/notebooks/netflix/netflix.ipynb index a3e3348f31..ff4047451d 100644 --- a/examples/notebooks/netflix/netflix.ipynb +++ b/examples/notebooks/netflix/netflix.ipynb @@ -23323,7 +23323,7 @@ } }, "source": [ - "val countries = DataFrame.readCSV(\"country_codes.csv\")\n", + "val countries = DataFrame.readCsv(\"country_codes.csv\")\n", "countries.head()" ], "outputs": [ diff --git a/examples/notebooks/titanic/Titanic.ipynb b/examples/notebooks/titanic/Titanic.ipynb index da41503aa9..e178f88cac 100644 --- a/examples/notebooks/titanic/Titanic.ipynb +++ b/examples/notebooks/titanic/Titanic.ipynb @@ -24,7 +24,7 @@ } }, "source": [ - "val df = DataFrame.readCSV(\n", + "val df = DataFrame.readCsv(\n", " fileOrUrl = \"../../idea-examples/titanic/src/main/resources/titanic.csv\",\n", " delimiter = ';',\n", " parserOptions = ParserOptions(locale = java.util.Locale.FRENCH),\n", diff --git a/examples/notebooks/wine/WineNetWIthKotlinDL.ipynb b/examples/notebooks/wine/WineNetWIthKotlinDL.ipynb index 88ee31f739..7af7773629 100644 --- a/examples/notebooks/wine/WineNetWIthKotlinDL.ipynb +++ b/examples/notebooks/wine/WineNetWIthKotlinDL.ipynb @@ -48,7 +48,7 @@ } }, "source": [ - "val rawDf = DataFrame.readCSV(fileOrUrl = \"winequality-red.csv\", delimiter = ';')\n", + "val rawDf = DataFrame.readCsv(fileOrUrl = \"winequality-red.csv\", delimiter = ';')\n", "rawDf.head()" ], "outputs": [ diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 7923a0c8e2..ae4155c3d1 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -13,7 +13,7 @@ libsPublisher = "1.9.23-dev-45" # "Bootstrap" version of the dataframe, used in the build itself to generate @DataSchema APIs, # dogfood Gradle / KSP plugins in tests and idea-examples modules -dataframe = "0.15.0-RC2" +dataframe = "0.16.0-dev-5772" korro = "0.1.6" binaryCompatibilityValidator = "0.16.3" diff --git a/plugins/dataframe-gradle-plugin/build.gradle.kts b/plugins/dataframe-gradle-plugin/build.gradle.kts index aaa0bd45c6..52253325a5 100644 --- a/plugins/dataframe-gradle-plugin/build.gradle.kts +++ b/plugins/dataframe-gradle-plugin/build.gradle.kts @@ -22,6 +22,7 @@ dependencies { implementation(project(":dataframe-arrow")) implementation(project(":dataframe-openapi-generator")) implementation(project(":dataframe-excel")) + implementation(project(":dataframe-csv")) implementation(project(":dataframe-jdbc")) implementation(libs.kotlin.gradle.plugin.api) @@ -53,11 +54,13 @@ tasks.withType { filter { it.replace( "%DATAFRAME_JAR%", - project(":core").configurations - .getByName("instrumentedJars") - .artifacts.single() - .file.absolutePath - .replace(File.separatorChar, '/'), + listOf(":core", ":dataframe-csv").joinToString("\", \"") { + project(it).configurations + .getByName("instrumentedJars") + .artifacts.single() + .file.absolutePath + .replace(File.separatorChar, '/') + }, ) } } @@ -115,12 +118,14 @@ val integrationTestTask = task("integrationTest") { dependsOn(":plugins:symbol-processor:publishToMavenLocal") dependsOn(":dataframe-arrow:publishToMavenLocal") dependsOn(":dataframe-excel:publishToMavenLocal") + dependsOn(":dataframe-csv:publishToMavenLocal") dependsOn(":dataframe-jdbc:publishToMavenLocal") dependsOn(":dataframe-openapi-generator:publishToMavenLocal") dependsOn(":dataframe-openapi:publishToMavenLocal") dependsOn(":publishApiPublicationToMavenLocal") dependsOn(":dataframe-arrow:publishDataframeArrowPublicationToMavenLocal") dependsOn(":dataframe-excel:publishDataframeExcelPublicationToMavenLocal") + dependsOn(":dataframe-csv:publishDataframeCsvPublicationToMavenLocal") dependsOn(":dataframe-jdbc:publishDataframeJDBCPublicationToMavenLocal") dependsOn(":dataframe-openapi-generator:publishDataframeOpenApiPublicationToMavenLocal") dependsOn(":plugins:symbol-processor:publishMavenPublicationToMavenLocal") diff --git a/plugins/dataframe-gradle-plugin/src/integrationTest/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorPluginIntegrationTest.kt b/plugins/dataframe-gradle-plugin/src/integrationTest/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorPluginIntegrationTest.kt index 852e890710..02a4d2fe91 100644 --- a/plugins/dataframe-gradle-plugin/src/integrationTest/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorPluginIntegrationTest.kt +++ b/plugins/dataframe-gradle-plugin/src/integrationTest/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorPluginIntegrationTest.kt @@ -358,7 +358,7 @@ class SchemaGeneratorPluginIntegrationTest : AbstractDataFramePluginIntegrationT import org.jetbrains.kotlinx.dataframe.api.filter fun main() { - val df = MySchema.readCSV() + val df = MySchema.readCsv() val df1 = df.filter { age != null } } """.trimIndent(), diff --git a/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/GenerateDataSchemaTask.kt b/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/GenerateDataSchemaTask.kt index 63c3de6522..8dbfa3f996 100644 --- a/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/GenerateDataSchemaTask.kt +++ b/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/GenerateDataSchemaTask.kt @@ -18,11 +18,11 @@ import org.jetbrains.kotlinx.dataframe.impl.codeGen.toStandaloneSnippet import org.jetbrains.kotlinx.dataframe.impl.codeGen.urlCodeGenReader import org.jetbrains.kotlinx.dataframe.impl.codeGen.urlDfReader import org.jetbrains.kotlinx.dataframe.io.ArrowFeather -import org.jetbrains.kotlinx.dataframe.io.CSV +import org.jetbrains.kotlinx.dataframe.io.CsvDeephaven import org.jetbrains.kotlinx.dataframe.io.Excel import org.jetbrains.kotlinx.dataframe.io.JSON import org.jetbrains.kotlinx.dataframe.io.OpenApi -import org.jetbrains.kotlinx.dataframe.io.TSV +import org.jetbrains.kotlinx.dataframe.io.TsvDeephaven import org.jetbrains.kotlinx.dataframe.io.getSchemaForSqlQuery import org.jetbrains.kotlinx.dataframe.io.getSchemaForSqlTable import org.jetbrains.kotlinx.dataframe.io.isUrl @@ -116,11 +116,10 @@ abstract class GenerateDataSchemaTask : DefaultTask() { val url = urlOf(data.get()) val formats = listOf( - // TODO new Csv() and Tsv() - CSV(delimiter = csvOptions.delimiter), + CsvDeephaven(delimiter = csvOptions.delimiter), JSON(typeClashTactic = jsonOptions.typeClashTactic, keyValuePaths = jsonOptions.keyValuePaths), Excel(), - TSV(), + TsvDeephaven(), ArrowFeather(), OpenApi(), ) diff --git a/plugins/dataframe-gradle-plugin/src/test/kotlin/org/jetbrains/dataframe/gradle/DataFrameReadTest.kt b/plugins/dataframe-gradle-plugin/src/test/kotlin/org/jetbrains/dataframe/gradle/DataFrameReadTest.kt index 785f2644ff..68f71b239f 100644 --- a/plugins/dataframe-gradle-plugin/src/test/kotlin/org/jetbrains/dataframe/gradle/DataFrameReadTest.kt +++ b/plugins/dataframe-gradle-plugin/src/test/kotlin/org/jetbrains/dataframe/gradle/DataFrameReadTest.kt @@ -9,6 +9,7 @@ import kotlinx.serialization.SerializationException import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.isEmpty import org.jetbrains.kotlinx.dataframe.io.read +import org.jetbrains.kotlinx.dataframe.io.readCsv import org.jetbrains.kotlinx.dataframe.io.readSqlTable import org.junit.Test import java.io.File @@ -82,7 +83,7 @@ class DataFrameReadTest { @Test fun `data accessible and readable`() { shouldNotThrowAny { - DataFrame.read(Paths.get("../../data/jetbrains repositories.csv").absolutePathString()) + DataFrame.readCsv(Paths.get("../../data/jetbrains repositories.csv").absolutePathString(), skipLines = 1) } } diff --git a/plugins/kotlin-dataframe/build.gradle.kts b/plugins/kotlin-dataframe/build.gradle.kts index 11a6d3e7ac..1e66f93118 100644 --- a/plugins/kotlin-dataframe/build.gradle.kts +++ b/plugins/kotlin-dataframe/build.gradle.kts @@ -41,6 +41,7 @@ dependencies { implementation(project(":core")) implementation(project(":dataframe-excel")) + implementation(project(":dataframe-csv")) api(libs.kotlinLogging) api("org.jetbrains.kotlinx:kotlinx-serialization-json:1.7.1") diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/read.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/read.kt index 285bf52ac0..0b8d99a3d5 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/read.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/read.kt @@ -3,9 +3,6 @@ package org.jetbrains.kotlinx.dataframe.plugin.impl.api import kotlinx.serialization.json.Json import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.plugin.impl.AbstractInterpreter -import org.jetbrains.kotlinx.dataframe.plugin.impl.Arguments -import org.jetbrains.kotlinx.dataframe.plugin.impl.Present import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.io.JSON import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS @@ -17,8 +14,11 @@ import org.jetbrains.kotlinx.dataframe.io.readDelimStr import org.jetbrains.kotlinx.dataframe.io.readExcel import org.jetbrains.kotlinx.dataframe.io.readJson import org.jetbrains.kotlinx.dataframe.io.readJsonStr +import org.jetbrains.kotlinx.dataframe.plugin.impl.AbstractInterpreter import org.jetbrains.kotlinx.dataframe.plugin.impl.AbstractSchemaModificationInterpreter +import org.jetbrains.kotlinx.dataframe.plugin.impl.Arguments import org.jetbrains.kotlinx.dataframe.plugin.impl.PluginDataFrameSchema +import org.jetbrains.kotlinx.dataframe.plugin.impl.Present import org.jetbrains.kotlinx.dataframe.plugin.impl.data.IoSchema import org.jetbrains.kotlinx.dataframe.plugin.impl.data.deserializeToPluginDataFrameSchema import org.jetbrains.kotlinx.dataframe.plugin.impl.data.toPluginDataFrameSchema @@ -37,6 +37,7 @@ internal class Read0 : AbstractInterpreter() { } } +// TODO migrate to dataframe-csv internal class ReadCSV0 : AbstractInterpreter() { val Arguments.fileOrUrl: String by arg() val Arguments.delimiter: Char by arg(defaultValue = Present(',')) @@ -109,6 +110,7 @@ private sealed interface DataSource private class UrlOrAbsolutePath(val path: String) : DataSource private class ResolutionDirFile(val file: File) : DataSource +// TODO migrate to dataframe-csv internal class ReadDelimStr : AbstractInterpreter() { val Arguments.text: String by arg() val Arguments.delimiter: Char by arg(defaultValue = Present(',')) @@ -116,7 +118,7 @@ internal class ReadDelimStr : AbstractInterpreter() { val Arguments.readLines: Int? by arg(defaultValue = Present(null)) override fun Arguments.interpret(): PluginDataFrameSchema { - return DataFrame.readDelimStr(text, delimiter, skipLines = skipLines, readLines = readLines).schema().toPluginDataFrameSchema() + return DataFrame.readDelimStr(text, delimiter, skipLines = skipLines.toLong(), readLines = readLines?.toLong()).schema().toPluginDataFrameSchema() } } diff --git a/plugins/kotlin-dataframe/testData/box/readDelimStr_delimiter.kt b/plugins/kotlin-dataframe/testData/box/readDelimStr_delimiter.kt deleted file mode 100644 index cab1d7b0b5..0000000000 --- a/plugins/kotlin-dataframe/testData/box/readDelimStr_delimiter.kt +++ /dev/null @@ -1,16 +0,0 @@ -import org.jetbrains.kotlinx.dataframe.* -import org.jetbrains.kotlinx.dataframe.annotations.* -import org.jetbrains.kotlinx.dataframe.api.* -import org.jetbrains.kotlinx.dataframe.io.* - -fun box(): String { - val tsv = """ - a b c - 1 2 3 - """ - val df = DataFrame.readDelimStr(tsv, '\t') - df.a - df.b - df.c - return "OK" -} diff --git a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java index 256a201833..7f090e436a 100644 --- a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java +++ b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java @@ -412,12 +412,6 @@ public void testReadCSV() { runTest("testData/box/readCSV.kt"); } - @Test - @TestMetadata("readDelimStr_delimiter.kt") - public void testReadDelimStr_delimiter() { - runTest("testData/box/readDelimStr_delimiter.kt"); - } - @Test @TestMetadata("readExcel.kt") public void testReadExcel() { diff --git a/plugins/symbol-processor/build.gradle.kts b/plugins/symbol-processor/build.gradle.kts index e2f68dacf8..c4712b7bf7 100644 --- a/plugins/symbol-processor/build.gradle.kts +++ b/plugins/symbol-processor/build.gradle.kts @@ -22,6 +22,7 @@ dependencies { implementation(project(":dataframe-arrow")) implementation(project(":dataframe-openapi-generator")) implementation(project(":dataframe-excel")) + implementation(project(":dataframe-csv")) implementation(project(":dataframe-jdbc")) implementation(libs.ksp.api) implementation(libs.kotlin.reflect) diff --git a/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataSchemaGenerator.kt b/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataSchemaGenerator.kt index b5cd07d964..b0290f9c14 100644 --- a/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataSchemaGenerator.kt +++ b/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataSchemaGenerator.kt @@ -23,11 +23,11 @@ import org.jetbrains.kotlinx.dataframe.impl.codeGen.toStandaloneSnippet import org.jetbrains.kotlinx.dataframe.impl.codeGen.urlCodeGenReader import org.jetbrains.kotlinx.dataframe.impl.codeGen.urlDfReader import org.jetbrains.kotlinx.dataframe.io.ArrowFeather -import org.jetbrains.kotlinx.dataframe.io.CSV +import org.jetbrains.kotlinx.dataframe.io.CsvDeephaven import org.jetbrains.kotlinx.dataframe.io.Excel import org.jetbrains.kotlinx.dataframe.io.JSON import org.jetbrains.kotlinx.dataframe.io.OpenApi -import org.jetbrains.kotlinx.dataframe.io.TSV +import org.jetbrains.kotlinx.dataframe.io.TsvDeephaven import org.jetbrains.kotlinx.dataframe.io.databaseCodeGenReader import org.jetbrains.kotlinx.dataframe.io.db.driverClassNameFromUrl import org.jetbrains.kotlinx.dataframe.io.getSchemaForSqlQuery @@ -155,14 +155,13 @@ class DataSchemaGenerator( codeGenerator.createNewFile(Dependencies(true, importStatement.origin), packageName, "$name.Generated") val formats = listOf( - // TODO new Csv() and Tsv() - CSV(delimiter = importStatement.csvOptions.delimiter), + CsvDeephaven(delimiter = importStatement.csvOptions.delimiter), JSON( typeClashTactic = importStatement.jsonOptions.typeClashTactic, keyValuePaths = importStatement.jsonOptions.keyValuePaths.map(::JsonPath), ), Excel(), - TSV(), + TsvDeephaven(), ArrowFeather(), OpenApi(), ) diff --git a/plugins/symbol-processor/src/test/kotlin/org/jetbrains/dataframe/ksp/DataFrameSymbolProcessorTest.kt b/plugins/symbol-processor/src/test/kotlin/org/jetbrains/dataframe/ksp/DataFrameSymbolProcessorTest.kt index 3d943c3378..61c34ca3fc 100644 --- a/plugins/symbol-processor/src/test/kotlin/org/jetbrains/dataframe/ksp/DataFrameSymbolProcessorTest.kt +++ b/plugins/symbol-processor/src/test/kotlin/org/jetbrains/dataframe/ksp/DataFrameSymbolProcessorTest.kt @@ -1043,7 +1043,7 @@ class DataFrameSymbolProcessorTest { result.successfulCompilation shouldBe false } - private val jetbrainsCsv = File("../../data/jetbrains repositories.csv") + private val jetbrainsCsv = File("../../data/jetbrains_repositories.csv") @Test fun `imported schema resolved`() { @@ -1063,7 +1063,7 @@ class DataFrameSymbolProcessorTest { import org.jetbrains.kotlinx.dataframe.annotations.CsvOptions import org.jetbrains.kotlinx.dataframe.annotations.ImportDataSchema - fun resolve() = Schema.readCSV() + fun resolve() = Schema.readCsv() """.trimIndent(), ), ), diff --git a/tests/build.gradle.kts b/tests/build.gradle.kts index b9a6c60e28..69d517b6ff 100644 --- a/tests/build.gradle.kts +++ b/tests/build.gradle.kts @@ -21,6 +21,7 @@ repositories { dependencies { implementation(project(":core")) implementation(project(":dataframe-excel")) + implementation(project(":dataframe-csv")) implementation(project(":dataframe-jdbc")) implementation(project(":dataframe-arrow")) testImplementation(libs.junit) diff --git a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt index 6d1b631f69..ffb27ae5fd 100644 --- a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt +++ b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt @@ -2,6 +2,7 @@ package org.jetbrains.kotlinx.dataframe.samples.api +import io.deephaven.csv.parsers.Parsers import io.kotest.matchers.shouldBe import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow @@ -11,7 +12,7 @@ import org.jetbrains.kotlinx.dataframe.api.columnTypes import org.jetbrains.kotlinx.dataframe.io.ColType import org.jetbrains.kotlinx.dataframe.io.StringColumns import org.jetbrains.kotlinx.dataframe.io.readArrowFeather -import org.jetbrains.kotlinx.dataframe.io.readCSV +import org.jetbrains.kotlinx.dataframe.io.readCsv import org.jetbrains.kotlinx.dataframe.io.readExcel import org.jetbrains.kotlinx.dataframe.io.readJson import org.jetbrains.kotlinx.dataframe.testArrowFeather @@ -19,6 +20,7 @@ import org.jetbrains.kotlinx.dataframe.testCsv import org.jetbrains.kotlinx.dataframe.testJson import org.junit.Ignore import org.junit.Test +import java.time.format.DateTimeFormatter import java.util.Locale import kotlin.reflect.typeOf @@ -27,7 +29,7 @@ class Read { fun readCsvCustom() { val file = testCsv("syntheticSample") // SampleStart - val df = DataFrame.readCSV( + val df = DataFrame.readCsv( file, delimiter = '|', header = listOf("A", "B", "C", "D"), @@ -85,7 +87,7 @@ class Read { fun readNumbersWithSpecificLocale() { val file = testCsv("numbers") // SampleStart - val df = DataFrame.readCSV( + val df = DataFrame.readCsv( file, parserOptions = ParserOptions(locale = Locale.UK), ) @@ -96,10 +98,58 @@ class Read { fun readNumbersWithColType() { val file = testCsv("numbers") // SampleStart - val df = DataFrame.readCSV( + val df = DataFrame.readCsv( file, colTypes = mapOf("colName" to ColType.String), ) // SampleEnd } + + @Test + fun readDatesWithSpecificDateTimePattern() { + val file = testCsv("dates") + // SampleStart + val df = DataFrame.readCsv( + file, + parserOptions = ParserOptions(dateTimePattern = "dd/MMM/yy h:mm a") + ) + // SampleEnd + } + + @Test + fun readDatesWithSpecificDateTimeFormatter() { + val file = testCsv("dates") + // SampleStart + val df = DataFrame.readCsv( + file, + parserOptions = ParserOptions(dateTimeFormatter = DateTimeFormatter.ofPattern("dd/MMM/yy h:mm a")) + ) + // SampleEnd + } + + @Test + fun readDatesWithDefaultType() { + val file = testCsv("dates") + // SampleStart + val df = DataFrame.readCsv( + file, + colTypes = mapOf(ColType.DEFAULT to ColType.String), + ) + // SampleEnd + } + + @Test + fun readDatesWithDeephavenDateTimeParser() { + val file = testCsv("dates") + try { + // SampleStart + val df = DataFrame.readCsv( + inputStream = file.openStream(), + adjustCsvSpecs = { // it: CsvSpecs.Builder + it.putParserForName("date", Parsers.DATETIME) + }, + ) + // SampleEnd + } catch (_: Exception) {} + } } diff --git a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt index a6eae3c2ac..47df5ec689 100644 --- a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt +++ b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt @@ -4,7 +4,6 @@ package org.jetbrains.kotlinx.dataframe.samples.api import io.kotest.matchers.string.shouldStartWith import org.apache.arrow.vector.types.pojo.Schema -import org.apache.commons.csv.CSVFormat import org.apache.poi.ss.usermodel.Sheet import org.apache.poi.ss.usermodel.WorkbookFactory import org.jetbrains.kotlinx.dataframe.api.filter @@ -13,11 +12,11 @@ import org.jetbrains.kotlinx.dataframe.io.ArrowWriter import org.jetbrains.kotlinx.dataframe.io.arrowWriter import org.jetbrains.kotlinx.dataframe.io.saveArrowFeatherToByteArray import org.jetbrains.kotlinx.dataframe.io.saveArrowIPCToByteArray -import org.jetbrains.kotlinx.dataframe.io.toCsv +import org.jetbrains.kotlinx.dataframe.io.toCsvStr import org.jetbrains.kotlinx.dataframe.io.toJson import org.jetbrains.kotlinx.dataframe.io.writeArrowFeather import org.jetbrains.kotlinx.dataframe.io.writeArrowIPC -import org.jetbrains.kotlinx.dataframe.io.writeCSV +import org.jetbrains.kotlinx.dataframe.io.writeCsv import org.jetbrains.kotlinx.dataframe.io.writeExcel import org.jetbrains.kotlinx.dataframe.io.writeJson import org.jetbrains.kotlinx.dataframe.io.writeMismatchMessage @@ -31,7 +30,7 @@ class Write : TestBase() { fun writeCsv() { useTempFile { file -> // SampleStart - df.writeCSV(file) + df.writeCsv(file) // SampleEnd } } @@ -48,8 +47,7 @@ class Write : TestBase() { @Test fun writeCsvStr() { // SampleStart - val format = CSVFormat.DEFAULT.builder().setDelimiter(';').setRecordSeparator(System.lineSeparator()).build() - val csvStr = df.toCsv(format) + val csvStr = df.toCsvStr(delimiter = ';', recordSeparator = System.lineSeparator()) // SampleEnd csvStr shouldStartWith """ name;age;city;weight;isHappy diff --git a/tests/src/test/resources/dates.csv b/tests/src/test/resources/dates.csv new file mode 100644 index 0000000000..6f0584be32 --- /dev/null +++ b/tests/src/test/resources/dates.csv @@ -0,0 +1,3 @@ +date +13/Jan/23 11:49 AM +14/Mar/23 5:35 PM