From cddad9cd8e5b6568ee35397113b8938e20409acf Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 13 May 2026 17:28:01 +0200 Subject: [PATCH 01/20] PoC for DataFrameReadSource --- core/build.gradle.kts | 6 +- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 146 ++++++++++++++++++ .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 58 +++++++ .../jetbrains/kotlinx/dataframe/io/json.kt | 98 ++++++++++++ ...s.kotlinx.dataframe.io.DataFrameReadSource | 1 + 5 files changed, 308 insertions(+), 1 deletion(-) create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt create mode 100644 core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt create mode 100644 dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 60e39459a5..3a4e0cf0f1 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -76,7 +76,6 @@ dependencies { testImplementation(libs.kotlin.scriptingJvm) testImplementation(libs.jsoup) testImplementation(libs.sl4jsimple) - testImplementation(projects.dataframeJson) testImplementation(libs.serialization.core) testImplementation(libs.serialization.json) @@ -85,6 +84,11 @@ dependencies { // for samples.api testImplementation(projects.dataframeCsv) + testImplementation(projects.dataframeJson) + testImplementation(projects.dataframeArrow) + testImplementation(projects.dataframeExcel) +// testImplementation(projects.dataframeGeo) + testImplementation(projects.dataframeJdbc) } // Configure testJava16 dependencies to extend from test diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt new file mode 100644 index 0000000000..1a02e66cf2 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -0,0 +1,146 @@ +package org.jetbrains.kotlinx.dataframe.io + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.DataFrame +import java.io.File +import java.io.FileNotFoundException +import java.net.URL +import java.nio.file.Path +import java.util.ServiceLoader +import kotlin.io.extension +import kotlin.io.path.extension +import kotlin.reflect.KType +import kotlin.reflect.full.withNullability +import kotlin.reflect.typeOf + +public interface DataFrameReadOptions + +public interface DataFrameReadSource { + public fun readDataFrameOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions? = null, + ): DataFrame<*>? + + public fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean + + // `DataFrame.Companion.read` methods uses this to sort list of all supported formats in ascending order (-1, 2, 10) + // sorted list is used to test if any format can read given input + public val testOrder: Int +} + +public data class DataSourceInfo( + public val type: DataSourceType, + public val extension: String? = null, + // TODO, Apache Tika? + public val mimeType: String? = null, +) + +public sealed class DataSourceType(public open val kType: KType) { + /** Like a path, file, or URL. */ + public data class Reference(override val kType: KType) : DataSourceType(kType) + + /** Actual data, like a String, ByteArray, InputStream */ + public data class InMemory(override val kType: KType) : DataSourceType(kType) + + public companion object { + public inline fun reference(): Reference = Reference(kType = typeOf()) + + public inline fun inMemory(): InMemory = InMemory(kType = typeOf()) + } +} + +/** + * NOTE: Needs to have fully qualified name in + * resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.NewSupportedDataFrameFormat + * to be detected here. + */ +internal val newSupportedFormats: List by lazy { + ServiceLoader.load(DataFrameReadSource::class.java) + .toList() + .distinct() + .sortedBy { it.testOrder } +} + +internal fun readDataFrameImpl( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions? = null, + formats: List = newSupportedFormats, +): AnyFrame { + val tries = mutableMapOf() + formats.sortedBy { it.testOrder }.forEach { + if (!it.acceptsSource(sourceInfo, options)) return@forEach + try { + val df = it.readDataFrameOrNull(source, sourceInfo, options) + if (df != null) return df + } catch (e: FileNotFoundException) { + throw e + } catch (e: Exception) { + tries[it::class.simpleName!!] = e + } + } + throw IllegalArgumentException("Unknown DataFrame source $source, $sourceInfo; Tried $tries") +} + +public fun DataFrame.Companion.readReference( + reference: Any, + type: KType, + options: DataFrameReadOptions? = null, +): AnyFrame = + readDataFrameImpl( + source = reference, + sourceInfo = DataSourceInfo( + type = DataSourceType.Reference(type.withNullability(false)), + extension = reference.extensionOrNull(), + mimeType = null, // TODO, Apache Tika? + ), + options = options, + ) + +public inline fun DataFrame.Companion.readReference( + reference: R, + options: DataFrameReadOptions? = null, +): AnyFrame = + readReference( + reference = reference, + type = typeOf(), + options = options, + ) + +internal fun Any.extensionOrNull(): String? = + when (this) { + is Path -> extension + + is File -> extension + + is URL -> path.takeIf { it.isNotBlank() }?.substringAfterLast('.') + + is String -> try { + asUrl(this).extensionOrNull() + } catch (_: Exception) { + null + } + + else -> null + } + +public fun DataFrame.Companion.readFromData(data: Any, type: KType, options: DataFrameReadOptions? = null): AnyFrame = + readDataFrameImpl( + source = data, + sourceInfo = DataSourceInfo( + type = DataSourceType.InMemory(type.withNullability(false)), + mimeType = null, // TODO, Apache Tika? + ), + options = options, + ) + +public inline fun DataFrame.Companion.readFromData( + data: R, + options: DataFrameReadOptions? = null, +): AnyFrame = + readFromData( + data = data, + type = typeOf(), + options = options, + ) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt new file mode 100644 index 0000000000..3691dcbac8 --- /dev/null +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -0,0 +1,58 @@ +package org.jetbrains.kotlinx.dataframe.io + +import io.kotest.matchers.shouldBe +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonElement +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.junit.Test +import java.io.File +import java.net.URI +import java.net.URL +import kotlin.io.path.Path +import kotlin.io.path.absolute + +class Guess2 { + + @Test + fun `read JSON reference`() { + val expected = DataFrame.readJson("../data/participants.json") + + DataFrame.readReference("../data/participants.json") shouldBe expected + DataFrame.readReference(Path("../data/participants.json")) shouldBe expected + DataFrame.readReference(File("../data/participants.json")) shouldBe expected + DataFrame.readReference( + Path("../data/participants.json").absolute().normalize().toUri().toURL(), + ) shouldBe expected + + val options = org.jetbrains.kotlinx.dataframe.io.Json.Options( + typeClashTactic = JSON.TypeClashTactic.ANY_COLUMNS, + ) + + DataFrame.readReference("../data/participants.json", options) shouldBe expected + DataFrame.readReference(Path("../data/participants.json"), options) shouldBe expected + DataFrame.readReference(File("../data/participants.json"), options) shouldBe expected + DataFrame.readReference( + Path("../data/participants.json").absolute().normalize().toUri().toURL(), + options, + ) shouldBe expected + } + + @Test + fun `read JSON in memory`() { + val expected = DataFrame.readJson("../data/participants.json") + + val file = File("../data/participants.json") + + DataFrame.readFromData(file.readText()) shouldBe expected + DataFrame.readFromData(file.inputStream()) shouldBe expected + DataFrame.readFromData(Json.decodeFromString(file.readText())) shouldBe expected + + val options = org.jetbrains.kotlinx.dataframe.io.Json.Options( + typeClashTactic = JSON.TypeClashTactic.ANY_COLUMNS, + ) + + DataFrame.readFromData(file.readText(), options) shouldBe expected + DataFrame.readFromData(file.inputStream(), options) shouldBe expected + DataFrame.readFromData(Json.decodeFromString(file.readText()), options) shouldBe expected + } +} diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 4949d1f104..9786d4f8f2 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -23,6 +23,8 @@ import org.jetbrains.kotlinx.dataframe.impl.io.encodeDataFrameWithMetadata import org.jetbrains.kotlinx.dataframe.impl.io.encodeFrame import org.jetbrains.kotlinx.dataframe.impl.io.encodeRow import org.jetbrains.kotlinx.dataframe.impl.io.readJsonImpl +import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.GZIP_ON +import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.LIMIT_SIZE_ON import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS import java.io.File @@ -30,8 +32,104 @@ import java.io.InputStream import java.net.URL import java.nio.file.Path import kotlin.io.path.writeText +import kotlin.reflect.KType +import kotlin.reflect.full.isSubtypeOf import kotlin.reflect.typeOf +public class Json : DataFrameReadSource { + + public data class Options( + val header: List = emptyList(), + val typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, + val keyValuePaths: List = emptyList(), + val unifyNumbers: Boolean = true, + ) : DataFrameReadOptions + + public companion object { + public val supportedReferenceTypes: Set = + setOf(typeOf(), typeOf(), typeOf(), typeOf()) + public val supportedInMemoryTypes: Set = + setOf(typeOf(), typeOf(), typeOf()) + } + + override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { + if (options != null && options !is Options) return false + if (sourceInfo.extension?.lowercase()?.equals("json") == false) return false + if (sourceInfo.mimeType?.lowercase()?.equals("application/json") == false) return false + + val kType = sourceInfo.type.kType + return when (sourceInfo.type) { + is DataSourceType.Reference -> + supportedReferenceTypes.any { kType.isSubtypeOf(it) } + + is DataSourceType.InMemory -> + supportedInMemoryTypes.any { kType.isSubtypeOf(it) } + } + } + + @OptIn(ExperimentalSerializationApi::class) + override fun readDataFrameOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ): DataFrame<*>? { + val options = (options ?: Options()) as Options + val kType = sourceInfo.type.kType + return when (sourceInfo.type) { + is DataSourceType.Reference -> { + val url = when { + kType.isSubTypeOf() -> (source as? String)?.let(::asUrl) + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } ?: return null + + DataFrame.readJson( + url = url, + header = options.header, + typeClashTactic = options.typeClashTactic, + keyValuePaths = options.keyValuePaths, + unifyNumbers = options.unifyNumbers, + ) + } + + is DataSourceType.InMemory -> { + val element = when { + kType.isSubTypeOf() -> + (source as? InputStream)?.let { + Json.decodeFromStream(it) + } + + kType.isSubTypeOf() -> + (source as? String)?.let { + Json.decodeFromString(it) + } + + kType.isSubTypeOf() -> + source as? JsonElement + + else -> null + } ?: return null + + readJsonImpl( + parsed = element, + header = options.header, + typeClashTactic = options.typeClashTactic, + keyValuePaths = options.keyValuePaths, + unifyNumbers = options.unifyNumbers, + ) + } + } + } + + override val testOrder: Int = 10_000 + + override fun toString(): String = "Json" +} + +private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) + public class JSON( private val typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, private val keyValuePaths: List = emptyList(), diff --git a/dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource b/dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource new file mode 100644 index 0000000000..bb9b992aea --- /dev/null +++ b/dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource @@ -0,0 +1 @@ +org.jetbrains.kotlinx.dataframe.io.Json From f1697689bb0fe42f719a9a2639ebd1a44202e5ef Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 14 May 2026 14:16:50 +0200 Subject: [PATCH 02/20] PoC for DataFrameReadSource with csv, tsv and excel support --- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 17 +- .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 149 +++++++++++++++++- .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 140 ++++++++++++++++ .../org/jetbrains/kotlinx/dataframe/io/tsv.kt | 140 ++++++++++++++++ ...s.kotlinx.dataframe.io.DataFrameReadSource | 2 + .../jetbrains/kotlinx/dataframe/io/xlsx.kt | 135 ++++++++++++++++ ...s.kotlinx.dataframe.io.DataFrameReadSource | 1 + .../jetbrains/kotlinx/dataframe/io/json.kt | 1 + 8 files changed, 582 insertions(+), 3 deletions(-) create mode 100644 dataframe-csv/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource create mode 100644 dataframe-excel/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index 1a02e66cf2..5192ae5017 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -2,8 +2,10 @@ package org.jetbrains.kotlinx.dataframe.io import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame +import java.io.ByteArrayInputStream import java.io.File import java.io.FileNotFoundException +import java.io.InputStream import java.net.URL import java.nio.file.Path import java.util.ServiceLoader @@ -68,11 +70,24 @@ internal fun readDataFrameImpl( options: DataFrameReadOptions? = null, formats: List = newSupportedFormats, ): AnyFrame { + // Some sources can only be read once, like InputStreams, so we need to buffer them + var bufferedSource: Any? = null + + fun getSource(): Any = + when (source) { + is InputStream -> { + if (bufferedSource == null) bufferedSource = source.readBytes() + ByteArrayInputStream(bufferedSource as ByteArray) + } + + else -> source + } + val tries = mutableMapOf() formats.sortedBy { it.testOrder }.forEach { if (!it.acceptsSource(sourceInfo, options)) return@forEach try { - val df = it.readDataFrameOrNull(source, sourceInfo, options) + val df = it.readDataFrameOrNull(getSource(), sourceInfo, options) if (df != null) return df } catch (e: FileNotFoundException) { throw e diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index 3691dcbac8..f9bef49023 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -3,11 +3,10 @@ package org.jetbrains.kotlinx.dataframe.io import io.kotest.matchers.shouldBe import kotlinx.serialization.json.Json import kotlinx.serialization.json.JsonElement +import org.apache.poi.ss.usermodel.WorkbookFactory import org.jetbrains.kotlinx.dataframe.DataFrame import org.junit.Test import java.io.File -import java.net.URI -import java.net.URL import kotlin.io.path.Path import kotlin.io.path.absolute @@ -55,4 +54,150 @@ class Guess2 { DataFrame.readFromData(file.inputStream(), options) shouldBe expected DataFrame.readFromData(Json.decodeFromString(file.readText()), options) shouldBe expected } + + @Test + fun `read CSV reference`() { + val csvPath = "../data/movies.csv" + val expected = DataFrame.readCsv(csvPath) + + DataFrame.readReference(csvPath) shouldBe expected + DataFrame.readReference(Path(csvPath)) shouldBe expected + DataFrame.readReference(File(csvPath)) shouldBe expected + DataFrame.readReference( + Path(csvPath).absolute().normalize().toUri().toURL(), + ) shouldBe expected + + val options = Csv.Options(delimiter = ',') + + DataFrame.readReference(csvPath, options) shouldBe expected + DataFrame.readReference(Path(csvPath), options) shouldBe expected + DataFrame.readReference(File(csvPath), options) shouldBe expected + DataFrame.readReference( + Path(csvPath).absolute().normalize().toUri().toURL(), + options, + ) shouldBe expected + } + + @Test + fun `read CSV in memory`() { + val file = File("../data/movies.csv") + val expected = DataFrame.readCsv(file) + + // String content has no extension hint, so we pin the format via options. + val options = Csv.Options(delimiter = ',') + + DataFrame.readFromData(file.readText(), options) shouldBe expected + DataFrame.readFromData(file.inputStream(), options) shouldBe expected + } + + @Test + fun `read TSV reference`() { + val tsvFile = File("src/test/resources/abc.tsv") + val expected = DataFrame.readTsv(tsvFile) + + DataFrame.readReference(tsvFile.path) shouldBe expected + DataFrame.readReference(Path(tsvFile.path)) shouldBe expected + DataFrame.readReference(tsvFile) shouldBe expected + DataFrame.readReference( + Path(tsvFile.path).absolute().normalize().toUri().toURL(), + ) shouldBe expected + + val options = Tsv.Options(delimiter = '\t') + + DataFrame.readReference(tsvFile.path, options) shouldBe expected + DataFrame.readReference(Path(tsvFile.path), options) shouldBe expected + DataFrame.readReference(tsvFile, options) shouldBe expected + DataFrame.readReference( + Path(tsvFile.path).absolute().normalize().toUri().toURL(), + options, + ) shouldBe expected + } + + @Test + fun `read TSV in memory`() { + val tsvFile = File("src/test/resources/abc.tsv") + val expected = DataFrame.readTsv(tsvFile) + val options = Tsv.Options(delimiter = '\t') + + // Binary/text without extension — options pin Tsv over Csv/Json/Xlsx. + DataFrame.readFromData(tsvFile.readText(), options) shouldBe expected + DataFrame.readFromData(tsvFile.inputStream(), options) shouldBe expected + } + + @Test + fun `read XLSX reference`() { + val xlsxFile = File("src/test/resources/sample2.xlsx") + val expected = DataFrame.readExcel(xlsxFile) + + DataFrame.readReference(xlsxFile.path) shouldBe expected + DataFrame.readReference(Path(xlsxFile.path)) shouldBe expected + DataFrame.readReference(xlsxFile) shouldBe expected + DataFrame.readReference( + Path(xlsxFile.path).absolute().normalize().toUri().toURL(), + ) shouldBe expected + + val options = ExcelNEW.Options(sheetName = "Sheet1") + + DataFrame.readReference(xlsxFile.path, options) shouldBe expected + DataFrame.readReference(Path(xlsxFile.path), options) shouldBe expected + DataFrame.readReference(xlsxFile, options) shouldBe expected + DataFrame.readReference( + Path(xlsxFile.path).absolute().normalize().toUri().toURL(), + options, + ) shouldBe expected + } + + @Test + fun `read XLS reference`() { + val xlsFile = File("src/test/resources/sample.xls") + val expected = DataFrame.readExcel(xlsFile) + + DataFrame.readReference(xlsFile.path) shouldBe expected + DataFrame.readReference(Path(xlsFile.path)) shouldBe expected + DataFrame.readReference(xlsFile) shouldBe expected + DataFrame.readReference( + Path(xlsFile.path).absolute().normalize().toUri().toURL(), + ) shouldBe expected + } + + @Test + fun `read XLSX in memory`() { + val xlsxFile = File("src/test/resources/sample2.xlsx") + val expected = DataFrame.readExcel(xlsxFile) + + // Workbook and Sheet are exclusive to ExcelNEW, so type-based dispatch works without options. + WorkbookFactory.create(xlsxFile.inputStream()).use { wb -> + DataFrame.readFromData(wb) shouldBe expected + DataFrame.readFromData(wb.getSheetAt(0)) shouldBe expected + } + + val options = ExcelNEW.Options() + + // Binary streams have no extension and are accepted by every format, + // so options are needed to pin ExcelNEW for the InputStream variant. + DataFrame.readFromData(xlsxFile.inputStream(), options) shouldBe expected + + WorkbookFactory.create(xlsxFile.inputStream()).use { wb -> + DataFrame.readFromData(wb, options) shouldBe expected + DataFrame.readFromData(wb.getSheetAt(0), options) shouldBe expected + } + } + + @Test + fun `read XLS in memory`() { + val xlsFile = File("src/test/resources/sample.xls") + val expected = DataFrame.readExcel(xlsFile) + +// WorkbookFactory.create(xlsFile.inputStream()).use { wb -> +// DataFrame.readFromData(wb) shouldBe expected +// } + + val options = ExcelNEW.Options() + + DataFrame.readFromData(xlsFile.inputStream()) shouldBe expected + +// WorkbookFactory.create(xlsFile.inputStream()).use { wb -> +// DataFrame.readFromData(wb) shouldBe expected +// } + } } diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 801fd3a717..e2c5be5832 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -3,12 +3,17 @@ package org.jetbrains.kotlinx.dataframe.io import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.ParserOptions import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams import java.io.File import java.io.InputStream +import java.net.URL +import java.nio.charset.Charset import java.nio.file.Path +import kotlin.reflect.KType +import kotlin.reflect.full.isSubtypeOf import kotlin.reflect.typeOf public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITER) : SupportedDataFrameFormat { @@ -33,6 +38,141 @@ public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITE } } +public class Csv : DataFrameReadSource { + + public data class Options( + val delimiter: Char = DelimParams.CSV_DELIMITER, + val header: List = DelimParams.HEADER, + val charset: Charset? = DelimParams.CHARSET, + val colTypes: Map = DelimParams.COL_TYPES, + val skipLines: Long = DelimParams.SKIP_LINES, + val readLines: Long? = DelimParams.READ_LINES, + val parserOptions: ParserOptions? = DelimParams.PARSER_OPTIONS, + val ignoreEmptyLines: Boolean = DelimParams.IGNORE_EMPTY_LINES, + val allowMissingColumns: Boolean = DelimParams.ALLOW_MISSING_COLUMNS, + val ignoreExcessColumns: Boolean = DelimParams.IGNORE_EXCESS_COLUMNS, + val quote: Char = DelimParams.QUOTE, + val ignoreSurroundingSpaces: Boolean = DelimParams.IGNORE_SURROUNDING_SPACES, + val trimInsideQuoted: Boolean = DelimParams.TRIM_INSIDE_QUOTED, + val parseParallel: Boolean = DelimParams.PARSE_PARALLEL, + ) : DataFrameReadOptions + + public companion object { + public val supportedReferenceTypes: Set = + setOf(typeOf(), typeOf(), typeOf(), typeOf()) + public val supportedInMemoryTypes: Set = + setOf(typeOf(), typeOf()) + + internal const val EXTENSION: String = "csv" + internal const val MIME_TYPE: String = "text/csv" + } + + override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { + if (options != null && options !is Options) return false + if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false + if (sourceInfo.mimeType?.lowercase()?.equals(MIME_TYPE) == false) return false + + val kType = sourceInfo.type.kType + return when (sourceInfo.type) { + is DataSourceType.Reference -> + supportedReferenceTypes.any { kType.isSubtypeOf(it) } + + is DataSourceType.InMemory -> + supportedInMemoryTypes.any { kType.isSubtypeOf(it) } + } + } + + override fun readDataFrameOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ): DataFrame<*>? { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.type.kType + return when (sourceInfo.type) { + is DataSourceType.Reference -> { + val url = when { + kType.isSubTypeOf() -> (source as? String)?.let(::asUrl) + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } ?: return null + + DataFrame.readCsv( + url = url, + delimiter = opts.delimiter, + header = opts.header, + charset = opts.charset, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + is DataSourceType.InMemory -> when { + kType.isSubTypeOf() -> + (source as? InputStream)?.let { stream -> + runCatching { stream.reset() } + DataFrame.readCsv( + inputStream = stream, + delimiter = opts.delimiter, + header = opts.header, + charset = opts.charset, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + kType.isSubTypeOf() -> + (source as? String)?.let { text -> + DataFrame.readCsvStr( + text = text, + delimiter = opts.delimiter, + header = opts.header, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + else -> null + } + } + } + + override val testOrder: Int = 20_000 + + override fun toString(): String = "Csv" +} + +private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) + private const val READ_CSV = "readCsv" internal class DefaultReadCsvMethod(path: String?, arguments: MethodArguments) : diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index ecb123b93c..be8f7f5e20 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -3,12 +3,17 @@ package org.jetbrains.kotlinx.dataframe.io import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.ParserOptions import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams import java.io.File import java.io.InputStream +import java.net.URL +import java.nio.charset.Charset import java.nio.file.Path +import kotlin.reflect.KType +import kotlin.reflect.full.isSubtypeOf import kotlin.reflect.typeOf public class TsvDeephaven(private val delimiter: Char = DelimParams.TSV_DELIMITER) : SupportedDataFrameFormat { @@ -33,6 +38,141 @@ public class TsvDeephaven(private val delimiter: Char = DelimParams.TSV_DELIMITE } } +public class Tsv : DataFrameReadSource { + + public data class Options( + val delimiter: Char = DelimParams.TSV_DELIMITER, + val header: List = DelimParams.HEADER, + val charset: Charset? = DelimParams.CHARSET, + val colTypes: Map = DelimParams.COL_TYPES, + val skipLines: Long = DelimParams.SKIP_LINES, + val readLines: Long? = DelimParams.READ_LINES, + val parserOptions: ParserOptions? = DelimParams.PARSER_OPTIONS, + val ignoreEmptyLines: Boolean = DelimParams.IGNORE_EMPTY_LINES, + val allowMissingColumns: Boolean = DelimParams.ALLOW_MISSING_COLUMNS, + val ignoreExcessColumns: Boolean = DelimParams.IGNORE_EXCESS_COLUMNS, + val quote: Char = DelimParams.QUOTE, + val ignoreSurroundingSpaces: Boolean = DelimParams.IGNORE_SURROUNDING_SPACES, + val trimInsideQuoted: Boolean = DelimParams.TRIM_INSIDE_QUOTED, + val parseParallel: Boolean = DelimParams.PARSE_PARALLEL, + ) : DataFrameReadOptions + + public companion object { + public val supportedReferenceTypes: Set = + setOf(typeOf(), typeOf(), typeOf(), typeOf()) + public val supportedInMemoryTypes: Set = + setOf(typeOf(), typeOf()) + + internal const val EXTENSION: String = "tsv" + internal const val MIME_TYPE: String = "text/tab-separated-values" + } + + override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { + if (options != null && options !is Options) return false + if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false + if (sourceInfo.mimeType?.lowercase()?.equals(MIME_TYPE) == false) return false + + val kType = sourceInfo.type.kType + return when (sourceInfo.type) { + is DataSourceType.Reference -> + supportedReferenceTypes.any { kType.isSubtypeOf(it) } + + is DataSourceType.InMemory -> + supportedInMemoryTypes.any { kType.isSubtypeOf(it) } + } + } + + override fun readDataFrameOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ): DataFrame<*>? { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.type.kType + return when (sourceInfo.type) { + is DataSourceType.Reference -> { + val url = when { + kType.isSubTypeOf() -> (source as? String)?.let(::asUrl) + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } ?: return null + + DataFrame.readTsv( + url = url, + delimiter = opts.delimiter, + header = opts.header, + charset = opts.charset, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + is DataSourceType.InMemory -> when { + kType.isSubTypeOf() -> + (source as? InputStream)?.let { stream -> + runCatching { stream.reset() } + DataFrame.readTsv( + inputStream = stream, + delimiter = opts.delimiter, + header = opts.header, + charset = opts.charset, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + kType.isSubTypeOf() -> + (source as? String)?.let { text -> + DataFrame.readTsvStr( + text = text, + delimiter = opts.delimiter, + header = opts.header, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + else -> null + } + } + } + + override val testOrder: Int = 30_000 + + override fun toString(): String = "Tsv" +} + +private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) + private const val READ_TSV = "readTsv" internal class DefaultReadTsvMethod(path: String?, arguments: MethodArguments) : diff --git a/dataframe-csv/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource b/dataframe-csv/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource new file mode 100644 index 0000000000..ebcae7710f --- /dev/null +++ b/dataframe-csv/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource @@ -0,0 +1,2 @@ +org.jetbrains.kotlinx.dataframe.io.Csv +org.jetbrains.kotlinx.dataframe.io.Tsv diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index e76332bfc7..90caeec347 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -45,6 +45,9 @@ import kotlin.io.path.exists import kotlin.io.path.fileSize import kotlin.io.path.inputStream import kotlin.io.path.outputStream +import kotlin.reflect.KType +import kotlin.reflect.full.isSubtypeOf +import kotlin.reflect.typeOf import java.time.LocalDate as JavaLocalDate import java.time.LocalDateTime as JavaLocalDateTime import java.util.Date as JavaDate @@ -64,6 +67,138 @@ public class Excel : SupportedDataFrameFormat { DefaultReadExcelMethod(pathRepresentation) } +public class ExcelNEW : DataFrameReadSource { + + public data class Options( + val sheetName: String? = null, + val skipRows: Int = 0, + val columns: String? = null, + val stringColumns: StringColumns? = null, + val rowsCount: Int? = null, + val nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE, + val firstRowIsHeader: Boolean = true, + val parseEmptyAsNull: Boolean = true, + ) : DataFrameReadOptions + + public companion object { + public val supportedReferenceTypes: Set = + setOf(typeOf(), typeOf(), typeOf(), typeOf()) + public val supportedInMemoryTypes: Set = + setOf(typeOf(), typeOf(), typeOf()) + + internal val EXTENSIONS: Set = setOf("xls", "xlsx") + internal val MIME_TYPES: Set = setOf( + "application/vnd.ms-excel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ) + } + + override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { + if (options != null && options !is Options) return false + val ext = sourceInfo.extension?.lowercase() + if (ext != null && ext !in EXTENSIONS) return false + val mime = sourceInfo.mimeType?.lowercase() + if (mime != null && mime !in MIME_TYPES) return false + + val kType = sourceInfo.type.kType + return when (sourceInfo.type) { + is DataSourceType.Reference -> + supportedReferenceTypes.any { kType.isSubtypeOf(it) } + + is DataSourceType.InMemory -> + supportedInMemoryTypes.any { kType.isSubtypeOf(it) } + } + } + + override fun readDataFrameOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ): DataFrame<*>? { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.type.kType + return when (sourceInfo.type) { + is DataSourceType.Reference -> { + val url = when { + kType.isSubTypeOf() -> (source as? String)?.let(::asUrl) + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } ?: return null + + DataFrame.readExcel( + url = url, + sheetName = opts.sheetName, + skipRows = opts.skipRows, + columns = opts.columns, + stringColumns = opts.stringColumns, + rowsCount = opts.rowsCount, + nameRepairStrategy = opts.nameRepairStrategy, + firstRowIsHeader = opts.firstRowIsHeader, + parseEmptyAsNull = opts.parseEmptyAsNull, + ) + } + + is DataSourceType.InMemory -> when { + kType.isSubTypeOf() -> + (source as? InputStream)?.let { stream -> + runCatching { stream.reset() } + DataFrame.readExcel( + inputStream = stream, + sheetName = opts.sheetName, + skipRows = opts.skipRows, + columns = opts.columns, + stringColumns = opts.stringColumns, + rowsCount = opts.rowsCount, + nameRepairStrategy = opts.nameRepairStrategy, + firstRowIsHeader = opts.firstRowIsHeader, + parseEmptyAsNull = opts.parseEmptyAsNull, + ) + } + + kType.isSubTypeOf() -> + (source as? Workbook)?.let { wb -> + DataFrame.readExcel( + wb = wb, + sheetName = opts.sheetName, + skipRows = opts.skipRows, + columns = opts.columns, + formattingOptions = opts.stringColumns?.toFormattingOptions(), + rowsCount = opts.rowsCount, + nameRepairStrategy = opts.nameRepairStrategy, + firstRowIsHeader = opts.firstRowIsHeader, + parseEmptyAsNull = opts.parseEmptyAsNull, + ) + } + + kType.isSubTypeOf() -> + (source as? Sheet)?.let { sheet -> + // readExcel(Sheet) has no sheetName parameter — the sheet is already selected. + DataFrame.readExcel( + sheet = sheet, + columns = opts.columns, + formattingOptions = opts.stringColumns?.toFormattingOptions(), + skipRows = opts.skipRows, + rowsCount = opts.rowsCount, + nameRepairStrategy = opts.nameRepairStrategy, + firstRowIsHeader = opts.firstRowIsHeader, + parseEmptyAsNull = opts.parseEmptyAsNull, + ) + } + + else -> null + } + } + } + + override val testOrder: Int = 40_000 + + override fun toString(): String = "Xlsx" +} + +private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) + private const val MESSAGE_REMOVE_1_1 = "Will be removed in 1.1." internal const val READ_EXCEL_OLD = "This function is only here for binary compatibility. $MESSAGE_REMOVE_1_1" diff --git a/dataframe-excel/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource b/dataframe-excel/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource new file mode 100644 index 0000000000..9ceeadf039 --- /dev/null +++ b/dataframe-excel/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource @@ -0,0 +1 @@ +org.jetbrains.kotlinx.dataframe.io.ExcelNEW diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 9786d4f8f2..3cf874a5a7 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -98,6 +98,7 @@ public class Json : DataFrameReadSource { val element = when { kType.isSubTypeOf() -> (source as? InputStream)?.let { + runCatching { it.reset() } Json.decodeFromStream(it) } From 783d1dc93ac9de8d88f1539a7c767ddd9aa60e08 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 14 May 2026 14:43:26 +0200 Subject: [PATCH 03/20] PoC for DataFrameReadSource with jdbc --- core/build.gradle.kts | 1 + .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 91 ++++++++++-- .../jetbrains/kotlinx/dataframe/io/Jdbc.kt | 133 ++++++++++++++++++ 3 files changed, 217 insertions(+), 8 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 3a4e0cf0f1..a6a2fc3115 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -89,6 +89,7 @@ dependencies { testImplementation(projects.dataframeExcel) // testImplementation(projects.dataframeGeo) testImplementation(projects.dataframeJdbc) + testImplementation(libs.h2db) } // Configure testJava16 dependencies to extend from test diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index f9bef49023..8728512a8b 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -5,8 +5,12 @@ import kotlinx.serialization.json.Json import kotlinx.serialization.json.JsonElement import org.apache.poi.ss.usermodel.WorkbookFactory import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.io.db.H2 import org.junit.Test import java.io.File +import java.sql.Connection +import java.sql.DriverManager +import javax.sql.DataSource import kotlin.io.path.Path import kotlin.io.path.absolute @@ -188,16 +192,87 @@ class Guess2 { val xlsFile = File("src/test/resources/sample.xls") val expected = DataFrame.readExcel(xlsFile) -// WorkbookFactory.create(xlsFile.inputStream()).use { wb -> -// DataFrame.readFromData(wb) shouldBe expected -// } + WorkbookFactory.create(xlsFile.inputStream()).use { wb -> + DataFrame.readFromData(wb) shouldBe expected + } + DataFrame.readFromData(xlsFile.inputStream()) shouldBe expected + WorkbookFactory.create(xlsFile.inputStream()).use { wb -> + DataFrame.readFromData(wb) shouldBe expected + } + } - val options = ExcelNEW.Options() + private fun h2Url(name: String) = "jdbc:h2:mem:$name;DB_CLOSE_DELAY=-1" - DataFrame.readFromData(xlsFile.inputStream()) shouldBe expected + private fun seed(connection: Connection) { + connection.createStatement().use { st -> + st.execute("CREATE TABLE Customer (id INT, name VARCHAR(255), age INT)") + st.execute("INSERT INTO Customer (id, name, age) VALUES (1, 'John', 40), (2, 'Alice', 25), (3, 'Bob', 47)") + } + } + + @Test + fun `read JDBC in memory`() { + val url = h2Url("guess2_inmem") + DriverManager.getConnection(url).use { conn -> + seed(conn) + + val expected = DataFrame.readSqlTable(conn, "Customer") + val tableOpts = Jdbc2.Options(sqlQueryOrTableName = "Customer") + val queryOpts = Jdbc2.Options(sqlQueryOrTableName = "SELECT * FROM Customer") + + // Connection — exclusive type, but query/table name must come from options. + DataFrame.readFromData(conn, tableOpts) shouldBe expected + DataFrame.readFromData(conn, queryOpts) shouldBe expected + + // DbConnectionConfig as InMemory. + val config = DbConnectionConfig(url = url) + DataFrame.readFromData(config, tableOpts) shouldBe expected + DataFrame.readFromData(config, queryOpts) shouldBe expected + + // DataSource — opens a fresh connection each call (DataSource.readDataFrame closes it via `use`). + val dataSource = object : DataSource { + override fun getConnection() = DriverManager.getConnection(url) + override fun getConnection(u: String?, p: String?) = DriverManager.getConnection(url) + override fun getLogWriter() = null + override fun setLogWriter(out: java.io.PrintWriter?) {} + override fun setLoginTimeout(seconds: Int) {} + override fun getLoginTimeout() = 0 + override fun getParentLogger() = throw UnsupportedOperationException() + override fun unwrap(iface: Class?): T = throw UnsupportedOperationException() + override fun isWrapperFor(iface: Class<*>?) = false + } + DataFrame.readFromData(dataSource, tableOpts) shouldBe expected + + // ResultSet — no sqlQueryOrTableName needed; just dbType (or a Connection to derive it). + conn.prepareStatement("SELECT * FROM Customer").use { ps -> + ps.executeQuery().use { rs -> + DataFrame.readFromData( + rs, + Jdbc2.Options(dbType = H2()), + ) shouldBe expected + } + } + conn.prepareStatement("SELECT * FROM Customer").use { ps -> + ps.executeQuery().use { rs -> + DataFrame.readFromData( + rs, + Jdbc2.Options(resultSetConnection = conn), + ) shouldBe expected + } + } + } + } + + @Test + fun `read JDBC reference`() { + val url = h2Url("guess2_ref") + DriverManager.getConnection(url).use { conn -> seed(conn) } + + val config = DbConnectionConfig(url = url) + val expected = DataFrame.readSqlTable(config, "Customer") + val tableOpts = Jdbc2.Options(sqlQueryOrTableName = "Customer") -// WorkbookFactory.create(xlsFile.inputStream()).use { wb -> -// DataFrame.readFromData(wb) shouldBe expected -// } + DataFrame.readReference(config, tableOpts) shouldBe expected + DataFrame.readReference(config, Jdbc2.Options(sqlQueryOrTableName = "SELECT * FROM Customer")) shouldBe expected } } diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt index a5307b96d9..0372c06d8d 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt @@ -5,9 +5,17 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod import org.jetbrains.kotlinx.dataframe.codeGen.Code import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod +import org.jetbrains.kotlinx.dataframe.io.db.DbType import java.io.File import java.io.InputStream import java.nio.file.Path +import java.sql.Connection +import java.sql.PreparedStatement +import java.sql.ResultSet +import javax.sql.DataSource +import kotlin.reflect.KType +import kotlin.reflect.full.isSubtypeOf +import kotlin.reflect.typeOf // TODO: https://github.com/Kotlin/dataframe/issues/450 public class Jdbc : @@ -36,6 +44,131 @@ public class Jdbc : DefaultReadJdbcMethod(pathRepresentation) } +/** + * [DataFrameReadSource] for JDBC. + * + * Reading from JDBC always needs a "what" (a SQL query or table name) — unlike a file, a [Connection] doesn't + * carry that instruction. Provide it via [Options.sqlQueryOrTableName]. The only exception is [ResultSet], + * which is already an executed query. + * + * Supports the following sources: + * - [Reference][DataSourceType.Reference]: [DbConnectionConfig] + * - [InMemory][DataSourceType.InMemory]: [Connection], [DataSource], [DbConnectionConfig], [ResultSet] + * + * Note: [DbConnectionConfig] is accepted as both reference and in-memory deliberately, to compare which + * feels more natural in practice. Other read-paths in this module — notably `readAllSqlTables` returning a + * `Map` — don't fit the single-DataFrame contract and are unchanged. + */ +public class Jdbc2 : DataFrameReadSource { + + public data class Options( + /** + * SQL query (e.g. `"SELECT * FROM users"`) or table name (e.g. `"users"`). + * Required for [Connection], [DataSource], and [DbConnectionConfig] sources. + * Ignored for [ResultSet] (it's already an executed query). + */ + val sqlQueryOrTableName: String? = null, + val limit: Int? = null, + val inferNullability: Boolean = true, + /** Optional, auto-detected from the source when `null`. */ + val dbType: DbType? = null, + val strictValidation: Boolean = true, + val configureStatement: (PreparedStatement) -> Unit = {}, + /** + * Only used when the source is a [ResultSet] and [dbType] is `null`; provides a [Connection] + * to auto-detect the database type. Ignored otherwise. + */ + val resultSetConnection: Connection? = null, + ) : DataFrameReadOptions + + public companion object { + public val supportedReferenceTypes: Set = setOf(typeOf()) + public val supportedInMemoryTypes: Set = + setOf( + typeOf(), + typeOf(), + typeOf(), + typeOf(), + ) + } + + override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { + if (options != null && options !is Options) return false + val kType = sourceInfo.type.kType + return when (sourceInfo.type) { + is DataSourceType.Reference -> + supportedReferenceTypes.any { kType.isSubtypeOf(it) } + + is DataSourceType.InMemory -> + supportedInMemoryTypes.any { kType.isSubtypeOf(it) } + } + } + + override fun readDataFrameOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ): DataFrame<*>? { + val opts = (options ?: Options()) as Options + return when (source) { + is ResultSet -> when { + opts.dbType != null -> + DataFrame.readResultSet(source, opts.dbType, opts.limit, opts.inferNullability) + + opts.resultSetConnection != null -> + DataFrame.readResultSet( + source, + opts.resultSetConnection, + opts.limit, + opts.inferNullability, + ) + + // Without dbType or a connection we can't read a ResultSet — fall through. + else -> null + } + + is Connection -> opts.sqlQueryOrTableName?.let { + source.readDataFrame( + sqlQueryOrTableName = it, + limit = opts.limit, + inferNullability = opts.inferNullability, + dbType = opts.dbType, + strictValidation = opts.strictValidation, + configureStatement = opts.configureStatement, + ) + } + + is DataSource -> opts.sqlQueryOrTableName?.let { + source.readDataFrame( + sqlQueryOrTableName = it, + limit = opts.limit, + inferNullability = opts.inferNullability, + dbType = opts.dbType, + strictValidation = opts.strictValidation, + configureStatement = opts.configureStatement, + ) + } + + is DbConnectionConfig -> opts.sqlQueryOrTableName?.let { + source.readDataFrame( + sqlQueryOrTableName = it, + limit = opts.limit, + inferNullability = opts.inferNullability, + dbType = opts.dbType, + strictValidation = opts.strictValidation, + configureStatement = opts.configureStatement, + ) + } + + else -> null + } + } + + override val testOrder: Int = 50_000 + + override fun toString(): String = "Jdbc" +} + private fun DataFrame.Companion.readJDBC(stream: File): DataFrame<*> { TODO("Not yet implemented") } From 08179ceff8ce17c224ea0d11e4a27fcd92637e1a Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 14 May 2026 15:23:58 +0200 Subject: [PATCH 04/20] Refactored `readReference` and `readFromData` to `readSource` across test and production code for improved API unification and flexibility. --- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 118 +++++++------ .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 148 +++++++++------- .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 162 ++++++++---------- .../org/jetbrains/kotlinx/dataframe/io/tsv.kt | 162 ++++++++---------- .../jetbrains/kotlinx/dataframe/io/xlsx.kt | 158 ++++++++--------- .../jetbrains/kotlinx/dataframe/io/Jdbc.kt | 21 +-- .../jetbrains/kotlinx/dataframe/io/json.kt | 103 +++++------ 7 files changed, 427 insertions(+), 445 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index 5192ae5017..3c7eff0c30 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -2,10 +2,12 @@ package org.jetbrains.kotlinx.dataframe.io import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.io.readSource import java.io.ByteArrayInputStream import java.io.File import java.io.FileNotFoundException import java.io.InputStream +import java.net.URI import java.net.URL import java.nio.file.Path import java.util.ServiceLoader @@ -31,30 +33,22 @@ public interface DataFrameReadSource { public val testOrder: Int } +/** + * Description of a source passed to [DataFrameReadSource]. Carries the static [kType] of the value and + * optional [extension]/[mimeType] hints, both of which may be `null` when the source is in-memory content + * with no reasonable file-extension/MIME interpretation (e.g., a raw [String], [InputStream], [java.sql.Connection], + * etc.). + */ public data class DataSourceInfo( - public val type: DataSourceType, + public val kType: KType, public val extension: String? = null, // TODO, Apache Tika? public val mimeType: String? = null, ) -public sealed class DataSourceType(public open val kType: KType) { - /** Like a path, file, or URL. */ - public data class Reference(override val kType: KType) : DataSourceType(kType) - - /** Actual data, like a String, ByteArray, InputStream */ - public data class InMemory(override val kType: KType) : DataSourceType(kType) - - public companion object { - public inline fun reference(): Reference = Reference(kType = typeOf()) - - public inline fun inMemory(): InMemory = InMemory(kType = typeOf()) - } -} - /** * NOTE: Needs to have fully qualified name in - * resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.NewSupportedDataFrameFormat + * resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource * to be detected here. */ internal val newSupportedFormats: List by lazy { @@ -70,6 +64,20 @@ internal fun readDataFrameImpl( options: DataFrameReadOptions? = null, formats: List = newSupportedFormats, ): AnyFrame { + if (source is String) { + val url = asUrlOrNull(source) + if (url != null) { + return readDataFrameImpl( + source = url, + sourceInfo = sourceInfo.copy( + kType = typeOf(), + ), + options = options, + formats = formats, + ) + } + } + // Some sources can only be read once, like InputStreams, so we need to buffer them var bufferedSource: Any? = null @@ -98,30 +106,33 @@ internal fun readDataFrameImpl( throw IllegalArgumentException("Unknown DataFrame source $source, $sourceInfo; Tried $tries") } -public fun DataFrame.Companion.readReference( - reference: Any, - type: KType, - options: DataFrameReadOptions? = null, -): AnyFrame = +/** + * Unified entry point for the [DataFrameReadSource] framework: passes [source] through every registered + * format until one reads it. + * + * For a [String] that points to an existing file or a recognized URL (`http://`, `https://`, `ftp://`), + * the source is normalized to a [URL] so the file-extension hint can be used to disambiguate formats. Any + * other [String] is treated as in-memory content (raw JSON/CSV/etc.). + * + * Named [readSource] rather than `read` to avoid shadowing the legacy `DataFrame.read(File/URL/Path/String, header)` + * entries in `guess.kt` that use the older [SupportedDataFrameFormat] system. Once the legacy entries are + * retired, this can be renamed to `read`. + */ +public fun DataFrame.Companion.readSource(source: Any, type: KType, options: DataFrameReadOptions? = null): AnyFrame = readDataFrameImpl( - source = reference, + source = source, sourceInfo = DataSourceInfo( - type = DataSourceType.Reference(type.withNullability(false)), - extension = reference.extensionOrNull(), + kType = type.withNullability(false), + extension = source.extensionOrNull(), mimeType = null, // TODO, Apache Tika? ), options = options, ) -public inline fun DataFrame.Companion.readReference( - reference: R, +public inline fun DataFrame.Companion.readSource( + source: R, options: DataFrameReadOptions? = null, -): AnyFrame = - readReference( - reference = reference, - type = typeOf(), - options = options, - ) +): AnyFrame = readSource(source = source, type = typeOf(), options = options) internal fun Any.extensionOrNull(): String? = when (this) { @@ -140,22 +151,29 @@ internal fun Any.extensionOrNull(): String? = else -> null } -public fun DataFrame.Companion.readFromData(data: Any, type: KType, options: DataFrameReadOptions? = null): AnyFrame = - readDataFrameImpl( - source = data, - sourceInfo = DataSourceInfo( - type = DataSourceType.InMemory(type.withNullability(false)), - mimeType = null, // TODO, Apache Tika? - ), - options = options, - ) +/** + * Non-throwing variant of [asUrl]: returns the [URL] iff [string] is a recognized URL (`http`/`https`/`ftp`) + * or an existing file path. Used by [readSource] to decide whether a [String] should be treated as a reference + * or as raw content. + */ +internal fun asUrlOrNull(string: String): URL? = + when { + isUrl(string) -> try { + URI(string).toURL() + } catch (_: Exception) { + null + } -public inline fun DataFrame.Companion.readFromData( - data: R, - options: DataFrameReadOptions? = null, -): AnyFrame = - readFromData( - data = data, - type = typeOf(), - options = options, - ) + else -> { + val file = try { + File(string) + } catch (_: Exception) { + null + } + if (file != null && file.exists() && file.isFile) { + file.toURI().toURL() + } else { + null + } + } + } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index 8728512a8b..086796316e 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -20,10 +20,10 @@ class Guess2 { fun `read JSON reference`() { val expected = DataFrame.readJson("../data/participants.json") - DataFrame.readReference("../data/participants.json") shouldBe expected - DataFrame.readReference(Path("../data/participants.json")) shouldBe expected - DataFrame.readReference(File("../data/participants.json")) shouldBe expected - DataFrame.readReference( + DataFrame.readSource("../data/participants.json") shouldBe expected + DataFrame.readSource(Path("../data/participants.json")) shouldBe expected + DataFrame.readSource(File("../data/participants.json")) shouldBe expected + DataFrame.readSource( Path("../data/participants.json").absolute().normalize().toUri().toURL(), ) shouldBe expected @@ -31,10 +31,10 @@ class Guess2 { typeClashTactic = JSON.TypeClashTactic.ANY_COLUMNS, ) - DataFrame.readReference("../data/participants.json", options) shouldBe expected - DataFrame.readReference(Path("../data/participants.json"), options) shouldBe expected - DataFrame.readReference(File("../data/participants.json"), options) shouldBe expected - DataFrame.readReference( + DataFrame.readSource("../data/participants.json", options) shouldBe expected + DataFrame.readSource(Path("../data/participants.json"), options) shouldBe expected + DataFrame.readSource(File("../data/participants.json"), options) shouldBe expected + DataFrame.readSource( Path("../data/participants.json").absolute().normalize().toUri().toURL(), options, ) shouldBe expected @@ -46,17 +46,17 @@ class Guess2 { val file = File("../data/participants.json") - DataFrame.readFromData(file.readText()) shouldBe expected - DataFrame.readFromData(file.inputStream()) shouldBe expected - DataFrame.readFromData(Json.decodeFromString(file.readText())) shouldBe expected + DataFrame.readSource(file.readText()) shouldBe expected + DataFrame.readSource(file.inputStream()) shouldBe expected + DataFrame.readSource(Json.decodeFromString(file.readText())) shouldBe expected val options = org.jetbrains.kotlinx.dataframe.io.Json.Options( typeClashTactic = JSON.TypeClashTactic.ANY_COLUMNS, ) - DataFrame.readFromData(file.readText(), options) shouldBe expected - DataFrame.readFromData(file.inputStream(), options) shouldBe expected - DataFrame.readFromData(Json.decodeFromString(file.readText()), options) shouldBe expected + DataFrame.readSource(file.readText(), options) shouldBe expected + DataFrame.readSource(file.inputStream(), options) shouldBe expected + DataFrame.readSource(Json.decodeFromString(file.readText()), options) shouldBe expected } @Test @@ -64,19 +64,19 @@ class Guess2 { val csvPath = "../data/movies.csv" val expected = DataFrame.readCsv(csvPath) - DataFrame.readReference(csvPath) shouldBe expected - DataFrame.readReference(Path(csvPath)) shouldBe expected - DataFrame.readReference(File(csvPath)) shouldBe expected - DataFrame.readReference( + DataFrame.readSource(csvPath) shouldBe expected + DataFrame.readSource(Path(csvPath)) shouldBe expected + DataFrame.readSource(File(csvPath)) shouldBe expected + DataFrame.readSource( Path(csvPath).absolute().normalize().toUri().toURL(), ) shouldBe expected val options = Csv.Options(delimiter = ',') - DataFrame.readReference(csvPath, options) shouldBe expected - DataFrame.readReference(Path(csvPath), options) shouldBe expected - DataFrame.readReference(File(csvPath), options) shouldBe expected - DataFrame.readReference( + DataFrame.readSource(csvPath, options) shouldBe expected + DataFrame.readSource(Path(csvPath), options) shouldBe expected + DataFrame.readSource(File(csvPath), options) shouldBe expected + DataFrame.readSource( Path(csvPath).absolute().normalize().toUri().toURL(), options, ) shouldBe expected @@ -90,8 +90,8 @@ class Guess2 { // String content has no extension hint, so we pin the format via options. val options = Csv.Options(delimiter = ',') - DataFrame.readFromData(file.readText(), options) shouldBe expected - DataFrame.readFromData(file.inputStream(), options) shouldBe expected + DataFrame.readSource(file.readText(), options) shouldBe expected + DataFrame.readSource(file.inputStream(), options) shouldBe expected } @Test @@ -99,19 +99,19 @@ class Guess2 { val tsvFile = File("src/test/resources/abc.tsv") val expected = DataFrame.readTsv(tsvFile) - DataFrame.readReference(tsvFile.path) shouldBe expected - DataFrame.readReference(Path(tsvFile.path)) shouldBe expected - DataFrame.readReference(tsvFile) shouldBe expected - DataFrame.readReference( + DataFrame.readSource(tsvFile.path) shouldBe expected + DataFrame.readSource(Path(tsvFile.path)) shouldBe expected + DataFrame.readSource(tsvFile) shouldBe expected + DataFrame.readSource( Path(tsvFile.path).absolute().normalize().toUri().toURL(), ) shouldBe expected val options = Tsv.Options(delimiter = '\t') - DataFrame.readReference(tsvFile.path, options) shouldBe expected - DataFrame.readReference(Path(tsvFile.path), options) shouldBe expected - DataFrame.readReference(tsvFile, options) shouldBe expected - DataFrame.readReference( + DataFrame.readSource(tsvFile.path, options) shouldBe expected + DataFrame.readSource(Path(tsvFile.path), options) shouldBe expected + DataFrame.readSource(tsvFile, options) shouldBe expected + DataFrame.readSource( Path(tsvFile.path).absolute().normalize().toUri().toURL(), options, ) shouldBe expected @@ -124,8 +124,8 @@ class Guess2 { val options = Tsv.Options(delimiter = '\t') // Binary/text without extension — options pin Tsv over Csv/Json/Xlsx. - DataFrame.readFromData(tsvFile.readText(), options) shouldBe expected - DataFrame.readFromData(tsvFile.inputStream(), options) shouldBe expected + DataFrame.readSource(tsvFile.readText(), options) shouldBe expected + DataFrame.readSource(tsvFile.inputStream(), options) shouldBe expected } @Test @@ -133,19 +133,19 @@ class Guess2 { val xlsxFile = File("src/test/resources/sample2.xlsx") val expected = DataFrame.readExcel(xlsxFile) - DataFrame.readReference(xlsxFile.path) shouldBe expected - DataFrame.readReference(Path(xlsxFile.path)) shouldBe expected - DataFrame.readReference(xlsxFile) shouldBe expected - DataFrame.readReference( + DataFrame.readSource(xlsxFile.path) shouldBe expected + DataFrame.readSource(Path(xlsxFile.path)) shouldBe expected + DataFrame.readSource(xlsxFile) shouldBe expected + DataFrame.readSource( Path(xlsxFile.path).absolute().normalize().toUri().toURL(), ) shouldBe expected val options = ExcelNEW.Options(sheetName = "Sheet1") - DataFrame.readReference(xlsxFile.path, options) shouldBe expected - DataFrame.readReference(Path(xlsxFile.path), options) shouldBe expected - DataFrame.readReference(xlsxFile, options) shouldBe expected - DataFrame.readReference( + DataFrame.readSource(xlsxFile.path, options) shouldBe expected + DataFrame.readSource(Path(xlsxFile.path), options) shouldBe expected + DataFrame.readSource(xlsxFile, options) shouldBe expected + DataFrame.readSource( Path(xlsxFile.path).absolute().normalize().toUri().toURL(), options, ) shouldBe expected @@ -156,10 +156,10 @@ class Guess2 { val xlsFile = File("src/test/resources/sample.xls") val expected = DataFrame.readExcel(xlsFile) - DataFrame.readReference(xlsFile.path) shouldBe expected - DataFrame.readReference(Path(xlsFile.path)) shouldBe expected - DataFrame.readReference(xlsFile) shouldBe expected - DataFrame.readReference( + DataFrame.readSource(xlsFile.path) shouldBe expected + DataFrame.readSource(Path(xlsFile.path)) shouldBe expected + DataFrame.readSource(xlsFile) shouldBe expected + DataFrame.readSource( Path(xlsFile.path).absolute().normalize().toUri().toURL(), ) shouldBe expected } @@ -171,19 +171,19 @@ class Guess2 { // Workbook and Sheet are exclusive to ExcelNEW, so type-based dispatch works without options. WorkbookFactory.create(xlsxFile.inputStream()).use { wb -> - DataFrame.readFromData(wb) shouldBe expected - DataFrame.readFromData(wb.getSheetAt(0)) shouldBe expected + DataFrame.readSource(wb) shouldBe expected + DataFrame.readSource(wb.getSheetAt(0)) shouldBe expected } val options = ExcelNEW.Options() // Binary streams have no extension and are accepted by every format, // so options are needed to pin ExcelNEW for the InputStream variant. - DataFrame.readFromData(xlsxFile.inputStream(), options) shouldBe expected + DataFrame.readSource(xlsxFile.inputStream(), options) shouldBe expected WorkbookFactory.create(xlsxFile.inputStream()).use { wb -> - DataFrame.readFromData(wb, options) shouldBe expected - DataFrame.readFromData(wb.getSheetAt(0), options) shouldBe expected + DataFrame.readSource(wb, options) shouldBe expected + DataFrame.readSource(wb.getSheetAt(0), options) shouldBe expected } } @@ -193,11 +193,11 @@ class Guess2 { val expected = DataFrame.readExcel(xlsFile) WorkbookFactory.create(xlsFile.inputStream()).use { wb -> - DataFrame.readFromData(wb) shouldBe expected + DataFrame.readSource(wb) shouldBe expected } - DataFrame.readFromData(xlsFile.inputStream()) shouldBe expected + DataFrame.readSource(xlsFile.inputStream()) shouldBe expected WorkbookFactory.create(xlsFile.inputStream()).use { wb -> - DataFrame.readFromData(wb) shouldBe expected + DataFrame.readSource(wb) shouldBe expected } } @@ -221,13 +221,13 @@ class Guess2 { val queryOpts = Jdbc2.Options(sqlQueryOrTableName = "SELECT * FROM Customer") // Connection — exclusive type, but query/table name must come from options. - DataFrame.readFromData(conn, tableOpts) shouldBe expected - DataFrame.readFromData(conn, queryOpts) shouldBe expected + DataFrame.readSource(conn, tableOpts) shouldBe expected + DataFrame.readSource(conn, queryOpts) shouldBe expected // DbConnectionConfig as InMemory. val config = DbConnectionConfig(url = url) - DataFrame.readFromData(config, tableOpts) shouldBe expected - DataFrame.readFromData(config, queryOpts) shouldBe expected + DataFrame.readSource(config, tableOpts) shouldBe expected + DataFrame.readSource(config, queryOpts) shouldBe expected // DataSource — opens a fresh connection each call (DataSource.readDataFrame closes it via `use`). val dataSource = object : DataSource { @@ -241,12 +241,12 @@ class Guess2 { override fun unwrap(iface: Class?): T = throw UnsupportedOperationException() override fun isWrapperFor(iface: Class<*>?) = false } - DataFrame.readFromData(dataSource, tableOpts) shouldBe expected + DataFrame.readSource(dataSource, tableOpts) shouldBe expected // ResultSet — no sqlQueryOrTableName needed; just dbType (or a Connection to derive it). conn.prepareStatement("SELECT * FROM Customer").use { ps -> ps.executeQuery().use { rs -> - DataFrame.readFromData( + DataFrame.readSource( rs, Jdbc2.Options(dbType = H2()), ) shouldBe expected @@ -254,7 +254,7 @@ class Guess2 { } conn.prepareStatement("SELECT * FROM Customer").use { ps -> ps.executeQuery().use { rs -> - DataFrame.readFromData( + DataFrame.readSource( rs, Jdbc2.Options(resultSetConnection = conn), ) shouldBe expected @@ -263,6 +263,28 @@ class Guess2 { } } + @Test + fun `unified readSource auto-detects references vs content`() { + // String that points to an existing file → routed through URL → JSON wins on extension + val jsonExpected = DataFrame.readJson("../data/participants.json") + DataFrame.readSource("../data/participants.json") shouldBe jsonExpected + + // Same idea for CSV/XLSX + val csvExpected = DataFrame.readCsv("../data/movies.csv") + DataFrame.readSource("../data/movies.csv") shouldBe csvExpected + + val xlsxExpected = DataFrame.readExcel(File("src/test/resources/sample2.xlsx")) + DataFrame.readSource("src/test/resources/sample2.xlsx") shouldBe xlsxExpected + + // String that doesn't resolve to a file → treated as raw content (JSON content here) + val file = File("../data/participants.json") + DataFrame.readSource(file.readText()) shouldBe jsonExpected + + // Non-String types: still work, no special handling needed + DataFrame.readSource(file) shouldBe jsonExpected + DataFrame.readSource(Path("../data/participants.json")) shouldBe jsonExpected + } + @Test fun `read JDBC reference`() { val url = h2Url("guess2_ref") @@ -272,7 +294,7 @@ class Guess2 { val expected = DataFrame.readSqlTable(config, "Customer") val tableOpts = Jdbc2.Options(sqlQueryOrTableName = "Customer") - DataFrame.readReference(config, tableOpts) shouldBe expected - DataFrame.readReference(config, Jdbc2.Options(sqlQueryOrTableName = "SELECT * FROM Customer")) shouldBe expected + DataFrame.readSource(config, tableOpts) shouldBe expected + DataFrame.readSource(config, Jdbc2.Options(sqlQueryOrTableName = "SELECT * FROM Customer")) shouldBe expected } } diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index e2c5be5832..b168062a49 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -58,10 +58,8 @@ public class Csv : DataFrameReadSource { ) : DataFrameReadOptions public companion object { - public val supportedReferenceTypes: Set = - setOf(typeOf(), typeOf(), typeOf(), typeOf()) - public val supportedInMemoryTypes: Set = - setOf(typeOf(), typeOf()) + public val supportedTypes: Set = + setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) internal const val EXTENSION: String = "csv" internal const val MIME_TYPE: String = "text/csv" @@ -71,15 +69,7 @@ public class Csv : DataFrameReadSource { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false if (sourceInfo.mimeType?.lowercase()?.equals(MIME_TYPE) == false) return false - - val kType = sourceInfo.type.kType - return when (sourceInfo.type) { - is DataSourceType.Reference -> - supportedReferenceTypes.any { kType.isSubtypeOf(it) } - - is DataSourceType.InMemory -> - supportedInMemoryTypes.any { kType.isSubtypeOf(it) } - } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( @@ -88,81 +78,77 @@ public class Csv : DataFrameReadSource { options: DataFrameReadOptions?, ): DataFrame<*>? { val opts = (options ?: Options()) as Options - val kType = sourceInfo.type.kType - return when (sourceInfo.type) { - is DataSourceType.Reference -> { - val url = when { - kType.isSubTypeOf() -> (source as? String)?.let(::asUrl) - kType.isSubTypeOf() -> source as? URL - kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() - kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() - else -> null - } ?: return null - - DataFrame.readCsv( - url = url, - delimiter = opts.delimiter, - header = opts.header, - charset = opts.charset, - colTypes = opts.colTypes, - skipLines = opts.skipLines, - readLines = opts.readLines, - parserOptions = opts.parserOptions, - ignoreEmptyLines = opts.ignoreEmptyLines, - allowMissingColumns = opts.allowMissingColumns, - ignoreExcessColumns = opts.ignoreExcessColumns, - quote = opts.quote, - ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, - trimInsideQuoted = opts.trimInsideQuoted, - parseParallel = opts.parseParallel, - ) - } - - is DataSourceType.InMemory -> when { - kType.isSubTypeOf() -> - (source as? InputStream)?.let { stream -> - runCatching { stream.reset() } - DataFrame.readCsv( - inputStream = stream, - delimiter = opts.delimiter, - header = opts.header, - charset = opts.charset, - colTypes = opts.colTypes, - skipLines = opts.skipLines, - readLines = opts.readLines, - parserOptions = opts.parserOptions, - ignoreEmptyLines = opts.ignoreEmptyLines, - allowMissingColumns = opts.allowMissingColumns, - ignoreExcessColumns = opts.ignoreExcessColumns, - quote = opts.quote, - ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, - trimInsideQuoted = opts.trimInsideQuoted, - parseParallel = opts.parseParallel, - ) - } - - kType.isSubTypeOf() -> - (source as? String)?.let { text -> - DataFrame.readCsvStr( - text = text, - delimiter = opts.delimiter, - header = opts.header, - colTypes = opts.colTypes, - skipLines = opts.skipLines, - readLines = opts.readLines, - parserOptions = opts.parserOptions, - ignoreEmptyLines = opts.ignoreEmptyLines, - allowMissingColumns = opts.allowMissingColumns, - ignoreExcessColumns = opts.ignoreExcessColumns, - quote = opts.quote, - ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, - trimInsideQuoted = opts.trimInsideQuoted, - parseParallel = opts.parseParallel, - ) - } - - else -> null - } + val kType = sourceInfo.kType + + val url: URL? = when { + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } + if (url != null) { + return DataFrame.readCsv( + url = url, + delimiter = opts.delimiter, + header = opts.header, + charset = opts.charset, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + return when { + kType.isSubTypeOf() -> + (source as? InputStream)?.let { stream -> + DataFrame.readCsv( + inputStream = stream, + delimiter = opts.delimiter, + header = opts.header, + charset = opts.charset, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + kType.isSubTypeOf() -> + (source as? String)?.let { text -> + DataFrame.readCsvStr( + text = text, + delimiter = opts.delimiter, + header = opts.header, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + else -> null } } diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index be8f7f5e20..fa605e43a0 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -58,10 +58,8 @@ public class Tsv : DataFrameReadSource { ) : DataFrameReadOptions public companion object { - public val supportedReferenceTypes: Set = - setOf(typeOf(), typeOf(), typeOf(), typeOf()) - public val supportedInMemoryTypes: Set = - setOf(typeOf(), typeOf()) + public val supportedTypes: Set = + setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) internal const val EXTENSION: String = "tsv" internal const val MIME_TYPE: String = "text/tab-separated-values" @@ -71,15 +69,7 @@ public class Tsv : DataFrameReadSource { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false if (sourceInfo.mimeType?.lowercase()?.equals(MIME_TYPE) == false) return false - - val kType = sourceInfo.type.kType - return when (sourceInfo.type) { - is DataSourceType.Reference -> - supportedReferenceTypes.any { kType.isSubtypeOf(it) } - - is DataSourceType.InMemory -> - supportedInMemoryTypes.any { kType.isSubtypeOf(it) } - } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( @@ -88,81 +78,77 @@ public class Tsv : DataFrameReadSource { options: DataFrameReadOptions?, ): DataFrame<*>? { val opts = (options ?: Options()) as Options - val kType = sourceInfo.type.kType - return when (sourceInfo.type) { - is DataSourceType.Reference -> { - val url = when { - kType.isSubTypeOf() -> (source as? String)?.let(::asUrl) - kType.isSubTypeOf() -> source as? URL - kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() - kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() - else -> null - } ?: return null - - DataFrame.readTsv( - url = url, - delimiter = opts.delimiter, - header = opts.header, - charset = opts.charset, - colTypes = opts.colTypes, - skipLines = opts.skipLines, - readLines = opts.readLines, - parserOptions = opts.parserOptions, - ignoreEmptyLines = opts.ignoreEmptyLines, - allowMissingColumns = opts.allowMissingColumns, - ignoreExcessColumns = opts.ignoreExcessColumns, - quote = opts.quote, - ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, - trimInsideQuoted = opts.trimInsideQuoted, - parseParallel = opts.parseParallel, - ) - } - - is DataSourceType.InMemory -> when { - kType.isSubTypeOf() -> - (source as? InputStream)?.let { stream -> - runCatching { stream.reset() } - DataFrame.readTsv( - inputStream = stream, - delimiter = opts.delimiter, - header = opts.header, - charset = opts.charset, - colTypes = opts.colTypes, - skipLines = opts.skipLines, - readLines = opts.readLines, - parserOptions = opts.parserOptions, - ignoreEmptyLines = opts.ignoreEmptyLines, - allowMissingColumns = opts.allowMissingColumns, - ignoreExcessColumns = opts.ignoreExcessColumns, - quote = opts.quote, - ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, - trimInsideQuoted = opts.trimInsideQuoted, - parseParallel = opts.parseParallel, - ) - } - - kType.isSubTypeOf() -> - (source as? String)?.let { text -> - DataFrame.readTsvStr( - text = text, - delimiter = opts.delimiter, - header = opts.header, - colTypes = opts.colTypes, - skipLines = opts.skipLines, - readLines = opts.readLines, - parserOptions = opts.parserOptions, - ignoreEmptyLines = opts.ignoreEmptyLines, - allowMissingColumns = opts.allowMissingColumns, - ignoreExcessColumns = opts.ignoreExcessColumns, - quote = opts.quote, - ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, - trimInsideQuoted = opts.trimInsideQuoted, - parseParallel = opts.parseParallel, - ) - } - - else -> null - } + val kType = sourceInfo.kType + + val url: URL? = when { + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } + if (url != null) { + return DataFrame.readTsv( + url = url, + delimiter = opts.delimiter, + header = opts.header, + charset = opts.charset, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + return when { + kType.isSubTypeOf() -> + (source as? InputStream)?.let { stream -> + DataFrame.readTsv( + inputStream = stream, + delimiter = opts.delimiter, + header = opts.header, + charset = opts.charset, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + kType.isSubTypeOf() -> + (source as? String)?.let { text -> + DataFrame.readTsvStr( + text = text, + delimiter = opts.delimiter, + header = opts.header, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + else -> null } } diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index 90caeec347..8830c0db55 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -81,10 +81,16 @@ public class ExcelNEW : DataFrameReadSource { ) : DataFrameReadOptions public companion object { - public val supportedReferenceTypes: Set = - setOf(typeOf(), typeOf(), typeOf(), typeOf()) - public val supportedInMemoryTypes: Set = - setOf(typeOf(), typeOf(), typeOf()) + public val supportedTypes: Set = + setOf( + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + ) internal val EXTENSIONS: Set = setOf("xls", "xlsx") internal val MIME_TYPES: Set = setOf( @@ -99,15 +105,7 @@ public class ExcelNEW : DataFrameReadSource { if (ext != null && ext !in EXTENSIONS) return false val mime = sourceInfo.mimeType?.lowercase() if (mime != null && mime !in MIME_TYPES) return false - - val kType = sourceInfo.type.kType - return when (sourceInfo.type) { - is DataSourceType.Reference -> - supportedReferenceTypes.any { kType.isSubtypeOf(it) } - - is DataSourceType.InMemory -> - supportedInMemoryTypes.any { kType.isSubtypeOf(it) } - } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( @@ -116,79 +114,75 @@ public class ExcelNEW : DataFrameReadSource { options: DataFrameReadOptions?, ): DataFrame<*>? { val opts = (options ?: Options()) as Options - val kType = sourceInfo.type.kType - return when (sourceInfo.type) { - is DataSourceType.Reference -> { - val url = when { - kType.isSubTypeOf() -> (source as? String)?.let(::asUrl) - kType.isSubTypeOf() -> source as? URL - kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() - kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() - else -> null - } ?: return null - - DataFrame.readExcel( - url = url, - sheetName = opts.sheetName, - skipRows = opts.skipRows, - columns = opts.columns, - stringColumns = opts.stringColumns, - rowsCount = opts.rowsCount, - nameRepairStrategy = opts.nameRepairStrategy, - firstRowIsHeader = opts.firstRowIsHeader, - parseEmptyAsNull = opts.parseEmptyAsNull, - ) - } + val kType = sourceInfo.kType - is DataSourceType.InMemory -> when { - kType.isSubTypeOf() -> - (source as? InputStream)?.let { stream -> - runCatching { stream.reset() } - DataFrame.readExcel( - inputStream = stream, - sheetName = opts.sheetName, - skipRows = opts.skipRows, - columns = opts.columns, - stringColumns = opts.stringColumns, - rowsCount = opts.rowsCount, - nameRepairStrategy = opts.nameRepairStrategy, - firstRowIsHeader = opts.firstRowIsHeader, - parseEmptyAsNull = opts.parseEmptyAsNull, - ) - } + val url: URL? = when { + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } + if (url != null) { + return DataFrame.readExcel( + url = url, + sheetName = opts.sheetName, + skipRows = opts.skipRows, + columns = opts.columns, + stringColumns = opts.stringColumns, + rowsCount = opts.rowsCount, + nameRepairStrategy = opts.nameRepairStrategy, + firstRowIsHeader = opts.firstRowIsHeader, + parseEmptyAsNull = opts.parseEmptyAsNull, + ) + } - kType.isSubTypeOf() -> - (source as? Workbook)?.let { wb -> - DataFrame.readExcel( - wb = wb, - sheetName = opts.sheetName, - skipRows = opts.skipRows, - columns = opts.columns, - formattingOptions = opts.stringColumns?.toFormattingOptions(), - rowsCount = opts.rowsCount, - nameRepairStrategy = opts.nameRepairStrategy, - firstRowIsHeader = opts.firstRowIsHeader, - parseEmptyAsNull = opts.parseEmptyAsNull, - ) - } + return when { + kType.isSubTypeOf() -> + (source as? InputStream)?.let { stream -> + DataFrame.readExcel( + inputStream = stream, + sheetName = opts.sheetName, + skipRows = opts.skipRows, + columns = opts.columns, + stringColumns = opts.stringColumns, + rowsCount = opts.rowsCount, + nameRepairStrategy = opts.nameRepairStrategy, + firstRowIsHeader = opts.firstRowIsHeader, + parseEmptyAsNull = opts.parseEmptyAsNull, + ) + } - kType.isSubTypeOf() -> - (source as? Sheet)?.let { sheet -> - // readExcel(Sheet) has no sheetName parameter — the sheet is already selected. - DataFrame.readExcel( - sheet = sheet, - columns = opts.columns, - formattingOptions = opts.stringColumns?.toFormattingOptions(), - skipRows = opts.skipRows, - rowsCount = opts.rowsCount, - nameRepairStrategy = opts.nameRepairStrategy, - firstRowIsHeader = opts.firstRowIsHeader, - parseEmptyAsNull = opts.parseEmptyAsNull, - ) - } + kType.isSubTypeOf() -> + (source as? Workbook)?.let { wb -> + DataFrame.readExcel( + wb = wb, + sheetName = opts.sheetName, + skipRows = opts.skipRows, + columns = opts.columns, + formattingOptions = opts.stringColumns?.toFormattingOptions(), + rowsCount = opts.rowsCount, + nameRepairStrategy = opts.nameRepairStrategy, + firstRowIsHeader = opts.firstRowIsHeader, + parseEmptyAsNull = opts.parseEmptyAsNull, + ) + } - else -> null - } + kType.isSubTypeOf() -> + (source as? Sheet)?.let { sheet -> + // readExcel(Sheet) has no sheetName parameter — the sheet is already selected. + DataFrame.readExcel( + sheet = sheet, + columns = opts.columns, + formattingOptions = opts.stringColumns?.toFormattingOptions(), + skipRows = opts.skipRows, + rowsCount = opts.rowsCount, + nameRepairStrategy = opts.nameRepairStrategy, + firstRowIsHeader = opts.firstRowIsHeader, + parseEmptyAsNull = opts.parseEmptyAsNull, + ) + } + + else -> null } } diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt index 0372c06d8d..5a8e1695c7 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt @@ -51,13 +51,10 @@ public class Jdbc : * carry that instruction. Provide it via [Options.sqlQueryOrTableName]. The only exception is [ResultSet], * which is already an executed query. * - * Supports the following sources: - * - [Reference][DataSourceType.Reference]: [DbConnectionConfig] - * - [InMemory][DataSourceType.InMemory]: [Connection], [DataSource], [DbConnectionConfig], [ResultSet] + * Supported source types: [Connection], [DataSource], [DbConnectionConfig], [ResultSet]. * - * Note: [DbConnectionConfig] is accepted as both reference and in-memory deliberately, to compare which - * feels more natural in practice. Other read-paths in this module — notably `readAllSqlTables` returning a - * `Map` — don't fit the single-DataFrame contract and are unchanged. + * `readAllSqlTables` returns a `Map` and doesn't fit the single-DataFrame contract; it + * remains as a direct API call. */ public class Jdbc2 : DataFrameReadSource { @@ -82,8 +79,7 @@ public class Jdbc2 : DataFrameReadSource { ) : DataFrameReadOptions public companion object { - public val supportedReferenceTypes: Set = setOf(typeOf()) - public val supportedInMemoryTypes: Set = + public val supportedTypes: Set = setOf( typeOf(), typeOf(), @@ -94,14 +90,7 @@ public class Jdbc2 : DataFrameReadSource { override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false - val kType = sourceInfo.type.kType - return when (sourceInfo.type) { - is DataSourceType.Reference -> - supportedReferenceTypes.any { kType.isSubtypeOf(it) } - - is DataSourceType.InMemory -> - supportedInMemoryTypes.any { kType.isSubtypeOf(it) } - } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 3cf874a5a7..3f8c731bef 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -46,25 +46,22 @@ public class Json : DataFrameReadSource { ) : DataFrameReadOptions public companion object { - public val supportedReferenceTypes: Set = - setOf(typeOf(), typeOf(), typeOf(), typeOf()) - public val supportedInMemoryTypes: Set = - setOf(typeOf(), typeOf(), typeOf()) + public val supportedTypes: Set = + setOf( + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + ) } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals("json") == false) return false if (sourceInfo.mimeType?.lowercase()?.equals("application/json") == false) return false - - val kType = sourceInfo.type.kType - return when (sourceInfo.type) { - is DataSourceType.Reference -> - supportedReferenceTypes.any { kType.isSubtypeOf(it) } - - is DataSourceType.InMemory -> - supportedInMemoryTypes.any { kType.isSubtypeOf(it) } - } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } @OptIn(ExperimentalSerializationApi::class) @@ -73,55 +70,45 @@ public class Json : DataFrameReadSource { sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, ): DataFrame<*>? { - val options = (options ?: Options()) as Options - val kType = sourceInfo.type.kType - return when (sourceInfo.type) { - is DataSourceType.Reference -> { - val url = when { - kType.isSubTypeOf() -> (source as? String)?.let(::asUrl) - kType.isSubTypeOf() -> source as? URL - kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() - kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() - else -> null - } ?: return null - - DataFrame.readJson( - url = url, - header = options.header, - typeClashTactic = options.typeClashTactic, - keyValuePaths = options.keyValuePaths, - unifyNumbers = options.unifyNumbers, - ) - } - - is DataSourceType.InMemory -> { - val element = when { - kType.isSubTypeOf() -> - (source as? InputStream)?.let { - runCatching { it.reset() } - Json.decodeFromStream(it) - } + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + + val url: URL? = when { + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } + if (url != null) { + return DataFrame.readJson( + url = url, + header = opts.header, + typeClashTactic = opts.typeClashTactic, + keyValuePaths = opts.keyValuePaths, + unifyNumbers = opts.unifyNumbers, + ) + } - kType.isSubTypeOf() -> - (source as? String)?.let { - Json.decodeFromString(it) - } + val element: JsonElement? = when { + kType.isSubTypeOf() -> + (source as? InputStream)?.let { Json.decodeFromStream(it) } - kType.isSubTypeOf() -> - source as? JsonElement + kType.isSubTypeOf() -> + (source as? String)?.let { Json.decodeFromString(it) } - else -> null - } ?: return null + kType.isSubTypeOf() -> + source as? JsonElement - readJsonImpl( - parsed = element, - header = options.header, - typeClashTactic = options.typeClashTactic, - keyValuePaths = options.keyValuePaths, - unifyNumbers = options.unifyNumbers, - ) - } - } + else -> null + } ?: return null + + return readJsonImpl( + parsed = element, + header = opts.header, + typeClashTactic = opts.typeClashTactic, + keyValuePaths = opts.keyValuePaths, + unifyNumbers = opts.unifyNumbers, + ) } override val testOrder: Int = 10_000 From 6143eab3ee0b9555635192c0fcde3adaa31b651f Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 14 May 2026 15:47:24 +0200 Subject: [PATCH 05/20] json early exit --- .../org/jetbrains/kotlinx/dataframe/io/json.kt | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 3f8c731bef..679e728ce6 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -89,12 +89,15 @@ public class Json : DataFrameReadSource { ) } - val element: JsonElement? = when { + val element: JsonElement = when { kType.isSubTypeOf() -> (source as? InputStream)?.let { Json.decodeFromStream(it) } kType.isSubTypeOf() -> - (source as? String)?.let { Json.decodeFromString(it) } + (source as? String)?.let { + if (it.isNotJson()) return null + Json.decodeFromString(it) + } kType.isSubTypeOf() -> source as? JsonElement @@ -114,6 +117,16 @@ public class Json : DataFrameReadSource { override val testOrder: Int = 10_000 override fun toString(): String = "Json" + + // early-exit check for String to see if it's definitely not json + private fun String.isNotJson(): Boolean = + trim().let { + it.isEmpty() || + !( + it.startsWith('{') && it.endsWith('}') || + it.startsWith('[') && it.endsWith(']') + ) + } } private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) From 0c93a45ea0d9ce8e25dd2fdb0879c0a6bd33b746 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 14 May 2026 16:39:35 +0200 Subject: [PATCH 06/20] DataFrameSchema.readSource --- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 101 +++++++++++++++--- .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 71 ++++++++++++ .../jetbrains/kotlinx/dataframe/io/Jdbc.kt | 36 +++++++ 3 files changed, 195 insertions(+), 13 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index 3c7eff0c30..7725f1204e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -2,7 +2,8 @@ package org.jetbrains.kotlinx.dataframe.io import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.io.readSource +import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import java.io.ByteArrayInputStream import java.io.File import java.io.FileNotFoundException @@ -26,6 +27,19 @@ public interface DataFrameReadSource { options: DataFrameReadOptions? = null, ): DataFrame<*>? + /** + * Read just the [DataFrameSchema] for [source]. + * + * The default implementation reads the full DataFrame and calls [DataFrame.schema]. Override when the + * source format can introspect types without materializing rows (e.g., JDBC metadata queries, Parquet/Arrow + * file footers, OpenAPI specs). + */ + public fun readDataFrameSchemaOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions? = null, + ): DataFrameSchema? = readDataFrameOrNull(source, sourceInfo, options)?.schema() + public fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean // `DataFrame.Companion.read` methods uses this to sort list of all supported formats in ascending order (-1, 2, 10) @@ -58,22 +72,36 @@ internal val newSupportedFormats: List by lazy { .sortedBy { it.testOrder } } -internal fun readDataFrameImpl( +/** + * Shared dispatch loop for [readDataFrameImpl] and [readDataFrameSchemaImpl]: handles String→URL + * normalization, InputStream buffering, sorted iteration, and error aggregation. The per-format read + * operation is supplied as [readOrNull]; [resultKind] is used only in the "unknown source" error message. + * + * @param [readOrNull] [DataFrameReadSource.readDataFrameOrNull] or [DataFrameReadSource.readDataFrameSchemaOrNull] + * Potentially, this could also return another type, like a GeoDataFrame. + */ +internal fun readSourceImpl( source: Any, sourceInfo: DataSourceInfo, - options: DataFrameReadOptions? = null, - formats: List = newSupportedFormats, -): AnyFrame { + options: DataFrameReadOptions?, + formats: List, + resultKind: String, + readOrNull: DataFrameReadSource.( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ) -> T?, +): T { if (source is String) { val url = asUrlOrNull(source) if (url != null) { - return readDataFrameImpl( + return readSourceImpl( source = url, - sourceInfo = sourceInfo.copy( - kType = typeOf(), - ), + sourceInfo = sourceInfo.copy(kType = typeOf()), options = options, formats = formats, + resultKind = resultKind, + readOrNull = readOrNull, ) } } @@ -95,15 +123,15 @@ internal fun readDataFrameImpl( formats.sortedBy { it.testOrder }.forEach { if (!it.acceptsSource(sourceInfo, options)) return@forEach try { - val df = it.readDataFrameOrNull(getSource(), sourceInfo, options) - if (df != null) return df + val result = it.readOrNull(getSource(), sourceInfo, options) + if (result != null) return result } catch (e: FileNotFoundException) { throw e } catch (e: Exception) { tries[it::class.simpleName!!] = e } } - throw IllegalArgumentException("Unknown DataFrame source $source, $sourceInfo; Tried $tries") + throw IllegalArgumentException("Unknown $resultKind source $source, $sourceInfo; Tried $tries") } /** @@ -119,7 +147,7 @@ internal fun readDataFrameImpl( * retired, this can be renamed to `read`. */ public fun DataFrame.Companion.readSource(source: Any, type: KType, options: DataFrameReadOptions? = null): AnyFrame = - readDataFrameImpl( + readSourceImpl( source = source, sourceInfo = DataSourceInfo( kType = type.withNullability(false), @@ -127,6 +155,9 @@ public fun DataFrame.Companion.readSource(source: Any, type: KType, options: Dat mimeType = null, // TODO, Apache Tika? ), options = options, + formats = newSupportedFormats, + resultKind = "DataFrame", + readOrNull = DataFrameReadSource::readDataFrameOrNull, ) public inline fun DataFrame.Companion.readSource( @@ -134,6 +165,50 @@ public inline fun DataFrame.Companion.readSource( options: DataFrameReadOptions? = null, ): AnyFrame = readSource(source = source, type = typeOf(), options = options) +/** + * Schema-only counterpart of [DataFrame.Companion.readSource]: dispatches through every registered + * [DataFrameReadSource] and returns the resulting [DataFrameSchema] without materializing rows when the + * format supports it (e.g., JDBC). Formats with no fast schema path fall back to reading the full DataFrame + * and calling [DataFrame.schema]. + */ +public fun DataFrameSchema.Companion.readSource( + source: Any, + type: KType, + options: DataFrameReadOptions? = null, +): DataFrameSchema = + readSourceImpl( + source = source, + sourceInfo = DataSourceInfo( + kType = type.withNullability(false), + extension = source.extensionOrNull(), + mimeType = null, // TODO, Apache Tika? + ), + options = options, + formats = newSupportedFormats, + resultKind = "DataFrameSchema", + readOrNull = DataFrameReadSource::readDataFrameSchemaOrNull, + ) + +internal fun readDataFrameSchemaImpl( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions? = null, + formats: List = newSupportedFormats, +): DataFrameSchema = + readSourceImpl( + source = source, + sourceInfo = sourceInfo, + options = options, + formats = formats, + resultKind = "DataFrameSchema", + readOrNull = DataFrameReadSource::readDataFrameSchemaOrNull, + ) + +public inline fun DataFrameSchema.Companion.readSource( + source: R, + options: DataFrameReadOptions? = null, +): DataFrameSchema = readSource(source = source, type = typeOf(), options = options) + internal fun Any.extensionOrNull(): String? = when (this) { is Path -> extension diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index 086796316e..e83603a939 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -5,7 +5,9 @@ import kotlinx.serialization.json.Json import kotlinx.serialization.json.JsonElement import org.apache.poi.ss.usermodel.WorkbookFactory import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.io.db.H2 +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import org.junit.Test import java.io.File import java.sql.Connection @@ -297,4 +299,73 @@ class Guess2 { DataFrame.readSource(config, tableOpts) shouldBe expected DataFrame.readSource(config, Jdbc2.Options(sqlQueryOrTableName = "SELECT * FROM Customer")) shouldBe expected } + + @Test + fun `read schema via default fallback (file-based formats)`() { + // JSON + val jsonExpected = DataFrame.readJson("../data/participants.json").schema() + DataFrameSchema.readSource( + File("../data/participants.json"), + ) shouldBe jsonExpected + DataFrameSchema.readSource( + "../data/participants.json", + ) shouldBe jsonExpected + + // CSV + val csvExpected = DataFrame.readCsv("../data/movies.csv").schema() + DataFrameSchema.readSource( + File("../data/movies.csv"), + ) shouldBe csvExpected + + // TSV + val tsvFile = File("src/test/resources/abc.tsv") + val tsvExpected = DataFrame.readTsv(tsvFile).schema() + DataFrameSchema.readSource(tsvFile) shouldBe tsvExpected + + // XLSX + val xlsxFile = File("src/test/resources/sample2.xlsx") + val xlsxExpected = DataFrame.readExcel(xlsxFile).schema() + DataFrameSchema.readSource(xlsxFile) shouldBe xlsxExpected + } + + @Test + fun `read JDBC schema via override`() { + val url = h2Url("guess2_schema") + DriverManager.getConnection(url).use { conn -> + seed(conn) + val expected = DataFrameSchema.readSqlTable(conn, "Customer") + val tableOpts = Jdbc2.Options(sqlQueryOrTableName = "Customer") + val queryOpts = Jdbc2.Options(sqlQueryOrTableName = "SELECT * FROM Customer") + + DataFrameSchema.readSource(conn, tableOpts) shouldBe expected + DataFrameSchema.readSource(conn, queryOpts) shouldBe expected + + val config = DbConnectionConfig(url = url) + DataFrameSchema.readSource(config, tableOpts) shouldBe expected + } + } + + @Test + fun `read JDBC schema from ResultSet does not advance cursor`() { + val url = h2Url("guess2_rs_schema") + DriverManager.getConnection(url).use { conn -> + seed(conn) + + conn.prepareStatement("SELECT * FROM Customer").use { ps -> + ps.executeQuery().use { rs -> + // Schema-from-ResultSet uses JDBC metadata only — no rows are fetched, so the + // cursor stays at "before first row". (And nullability comes from the column metadata, + // which is conservatively nullable for columns without NOT NULL constraints; this is + // why we don't compare against the data-inferred schema directly.) + val expected = DataFrameSchema.readResultSet( + conn.prepareStatement("SELECT * FROM Customer").executeQuery(), + H2(), + ) + val schema = DataFrameSchema.readSource(rs, Jdbc2.Options(dbType = H2())) + schema shouldBe expected + rs.isBeforeFirst shouldBe true + } + } + } + } } diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt index 5a8e1695c7..1d29bc328e 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt @@ -6,6 +6,8 @@ import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod import org.jetbrains.kotlinx.dataframe.codeGen.Code import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod import org.jetbrains.kotlinx.dataframe.io.db.DbType +import org.jetbrains.kotlinx.dataframe.io.db.extractDBTypeFromConnection +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import java.io.File import java.io.InputStream import java.nio.file.Path @@ -153,6 +155,40 @@ public class Jdbc2 : DataFrameReadSource { } } + override fun readDataFrameSchemaOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ): DataFrameSchema? { + val opts = (options ?: Options()) as Options + return when (source) { + // ResultSet has a true zero-row metadata-only path. + is ResultSet -> when { + opts.dbType != null -> + DataFrameSchema.readResultSet(source, opts.dbType) + + opts.resultSetConnection != null -> + DataFrameSchema.readResultSet(source, extractDBTypeFromConnection(opts.resultSetConnection)) + + else -> null + } + + is Connection -> opts.sqlQueryOrTableName?.let { + source.readDataFrameSchema(sqlQueryOrTableName = it, dbType = opts.dbType) + } + + is DataSource -> opts.sqlQueryOrTableName?.let { + source.readDataFrameSchema(sqlQueryOrTableName = it, dbType = opts.dbType) + } + + is DbConnectionConfig -> opts.sqlQueryOrTableName?.let { + source.readDataFrameSchema(sqlQueryOrTableName = it, dbType = opts.dbType) + } + + else -> null + } + } + override val testOrder: Int = 50_000 override fun toString(): String = "Jdbc" From 2283c93037f4445521350fe0aa4d2ed9ca36b1ff Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 18 May 2026 14:03:29 +0200 Subject: [PATCH 07/20] added Arrow support to DataFrameReadSource --- core/build.gradle.kts | 2 + .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 64 +++++ .../kotlinx/dataframe/io/arrowReading.kt | 244 ++++++++++++++++++ ...s.kotlinx.dataframe.io.DataFrameReadSource | 3 + .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 4 +- .../org/jetbrains/kotlinx/dataframe/io/tsv.kt | 4 +- .../jetbrains/kotlinx/dataframe/io/xlsx.kt | 5 +- .../jetbrains/kotlinx/dataframe/io/Jdbc.kt | 4 +- .../jetbrains/kotlinx/dataframe/io/json.kt | 4 +- 9 files changed, 323 insertions(+), 11 deletions(-) create mode 100644 dataframe-arrow/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource diff --git a/core/build.gradle.kts b/core/build.gradle.kts index a6a2fc3115..962ac51863 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -313,6 +313,8 @@ tasks.withType { tasks.test { maxHeapSize = "1g" + // Arrow's off-heap allocator needs deep reflection into java.nio. + jvmArgs("--add-opens", "java.base/java.nio=ALL-UNNAMED") } // Test task for Java 16+ language-specific tests diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index e83603a939..70534d7771 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -368,4 +368,68 @@ class Guess2 { } } } + + @Test + fun `read Arrow Feather reference`() { + val featherFile = File("src/test/resources/test.feather") + val expected = DataFrame.readArrowFeather(featherFile) + + DataFrame.readSource(featherFile.path) shouldBe expected + DataFrame.readSource(Path(featherFile.path)) shouldBe expected + DataFrame.readSource(featherFile) shouldBe expected + DataFrame.readSource( + Path(featherFile.path).absolute().normalize().toUri().toURL(), + ) shouldBe expected + + val options = ArrowFeatherNEW.Options() + + DataFrame.readSource(featherFile.path, options) shouldBe expected + DataFrame.readSource(featherFile, options) shouldBe expected + } + + @Test + fun `read Arrow Feather in memory`() { + val featherFile = File("src/test/resources/test.feather") + val expected = DataFrame.readArrowFeather(featherFile) + val options = ArrowFeatherNEW.Options() + + // ByteArray, InputStream, SeekableByteChannel all need options to disambiguate (no extension). + DataFrame.readSource(featherFile.readBytes(), options) shouldBe expected + DataFrame.readSource(featherFile.inputStream(), options) shouldBe expected + java.nio.file.Files.newByteChannel(featherFile.toPath()).use { channel -> + DataFrame.readSource(channel, options) shouldBe expected + } + } + + @Test + fun `read Arrow IPC reference`() { + val ipcFile = File("src/test/resources/test.arrow") + val expected = DataFrame.readArrowIPC(ipcFile) + + DataFrame.readSource(ipcFile.path) shouldBe expected + DataFrame.readSource(Path(ipcFile.path)) shouldBe expected + DataFrame.readSource(ipcFile) shouldBe expected + DataFrame.readSource( + Path(ipcFile.path).absolute().normalize().toUri().toURL(), + ) shouldBe expected + + val options = ArrowIPC.Options() + DataFrame.readSource(ipcFile, options) shouldBe expected + } + + @Test + fun `read Parquet reference`() { + val parquetFile = File("src/test/resources/test.parquet") + val expected = DataFrame.readParquet(parquetFile) + + DataFrame.readSource(parquetFile.path) shouldBe expected + DataFrame.readSource(Path(parquetFile.path)) shouldBe expected + DataFrame.readSource(parquetFile) shouldBe expected + DataFrame.readSource( + Path(parquetFile.path).absolute().normalize().toUri().toURL(), + ) shouldBe expected + + val options = Parquet.Options() + DataFrame.readSource(parquetFile, options) shouldBe expected + } } diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt index 5cf884d73d..63161add45 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt @@ -18,6 +18,9 @@ import java.nio.channels.ReadableByteChannel import java.nio.channels.SeekableByteChannel import java.nio.file.Files import java.nio.file.Path +import kotlin.reflect.KType +import kotlin.reflect.full.isSubtypeOf +import kotlin.reflect.typeOf public class ArrowFeather : SupportedDataFrameFormat { override fun readDataFrame(stream: InputStream, header: List): AnyFrame = @@ -36,6 +39,247 @@ public class ArrowFeather : SupportedDataFrameFormat { DefaultReadArrowMethod(pathRepresentation) } +/** + * [DataFrameReadSource] for [Arrow Feather files][DataFrame.readArrowFeather] (random-access IPC format). + * + * Supported source types: + * - References: [URL], [Path], [File] + * - In-memory: [SeekableByteChannel], [ByteArray], [InputStream], [ArrowReader] + * + * Default-accepts the `.feather` extension. To read with no extension hint (e.g., an [InputStream]) pass + * an [Options] instance to disambiguate from text formats. + */ +public class ArrowFeatherNEW : DataFrameReadSource { + + public data class Options(val nullability: NullabilityOptions = NullabilityOptions.Infer) : DataFrameReadOptions + + public companion object { + public val SUPPORTED_TYPES: Set = + setOf( + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + ) + + internal const val EXTENSION: String = "feather" + } + + override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { + if (options != null && options !is Options) return false + if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false + return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } + } + + override fun readDataFrameOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ): DataFrame<*>? { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + + // ArrowReader is exclusive; check before more general types. + if (kType.isArrowSubTypeOf()) { + return (source as? ArrowReader)?.let { DataFrame.readArrow(it, opts.nullability) } + } + + val url: URL? = when { + kType.isArrowSubTypeOf() -> source as? URL + kType.isArrowSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isArrowSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } + if (url != null) { + return DataFrame.readArrowFeather(url, opts.nullability) + } + + return when { + kType.isArrowSubTypeOf() -> + (source as? SeekableByteChannel)?.let { + DataFrame.readArrowFeather(it, nullability = opts.nullability) + } + + kType.isArrowSubTypeOf() -> + (source as? ByteArray)?.let { DataFrame.readArrowFeather(it, opts.nullability) } + + kType.isArrowSubTypeOf() -> + (source as? InputStream)?.let { DataFrame.readArrowFeather(it, opts.nullability) } + + else -> null + } + } + + override val testOrder: Int = 60_000 + + override fun toString(): String = "ArrowFeather" +} + +/** + * [DataFrameReadSource] for [Arrow IPC streaming files][DataFrame.readArrowIPC]. + * + * Supported source types: + * - References: [URL], [Path], [File] + * - In-memory: [InputStream], [ByteArray], [ReadableByteChannel], [ArrowReader] + * + * There's no widely-standardized extension for IPC streaming files (`.arrow` is most common but is also + * used for random-access Feather), so this format accepts the `.arrow` extension. If your `.arrow` file is + * actually random-access (Feather), prefer [ArrowFeatherNEW] — both formats will match `.arrow`, but + * [ArrowFeatherNEW] runs first by [testOrder] and a Feather read of a streaming-format file will throw, + * letting the framework fall through to [ArrowIPC]. + */ +public class ArrowIPC : DataFrameReadSource { + + public data class Options( + val allocator: RootAllocator = Allocator.ROOT, + val nullability: NullabilityOptions = NullabilityOptions.Infer, + ) : DataFrameReadOptions + + public companion object { + public val SUPPORTED_TYPES: Set = + setOf( + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + ) + + internal const val EXTENSION: String = "arrow" + } + + override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { + if (options != null && options !is Options) return false + if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false + return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } + } + + override fun readDataFrameOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ): DataFrame<*>? { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + + if (kType.isArrowSubTypeOf()) { + return (source as? ArrowReader)?.let { DataFrame.readArrow(it, opts.nullability) } + } + + val url: URL? = when { + kType.isArrowSubTypeOf() -> source as? URL + kType.isArrowSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isArrowSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } + if (url != null) { + return DataFrame.readArrowIPC(url, opts.nullability) + } + + return when { + kType.isArrowSubTypeOf() -> + (source as? ReadableByteChannel)?.let { + DataFrame.readArrowIPC(it, allocator = opts.allocator, nullability = opts.nullability) + } + + kType.isArrowSubTypeOf() -> + (source as? ByteArray)?.let { DataFrame.readArrowIPC(it, opts.nullability) } + + kType.isArrowSubTypeOf() -> + (source as? InputStream)?.let { DataFrame.readArrowIPC(it, opts.nullability) } + + else -> null + } + } + + // Runs after ArrowFeatherNEW so that `.feather` files get the random-access reader first. + // Both accept `.arrow`; if Feather reading throws on an IPC streaming file the framework falls + // through to here. + override val testOrder: Int = 60_100 + + override fun toString(): String = "ArrowIPC" +} + +/** + * [DataFrameReadSource] for Apache Parquet files (read via Arrow Dataset). + * + * Arrow Dataset only consumes URIs, so only reference-style sources are supported: + * - References: [URL], [Path], [File] + * + * TODO? Multi-file Parquet datasets (vararg in [DataFrame.readParquet]) aren't covered by this single-source API; + * use [DataFrame.readParquet] directly for those. + */ +public class Parquet : DataFrameReadSource { + + public data class Options( + val nullability: NullabilityOptions = NullabilityOptions.Infer, + val batchSize: Long = ARROW_PARQUET_DEFAULT_BATCH_SIZE, + ) : DataFrameReadOptions + + public companion object { + public val SUPPORTED_TYPES: Set = + setOf(typeOf(), typeOf(), typeOf()) + + internal const val EXTENSION: String = "parquet" + } + + override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { + if (options != null && options !is Options) return false + if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false + return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } + } + + override fun readDataFrameOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ): DataFrame<*>? { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + return when { + kType.isArrowSubTypeOf() -> + (source as? URL)?.let { + DataFrame.readParquet( + it, + nullability = opts.nullability, + batchSize = opts.batchSize, + ) + } + + kType.isArrowSubTypeOf() -> + (source as? Path)?.let { + DataFrame.readParquet( + it, + nullability = opts.nullability, + batchSize = opts.batchSize, + ) + } + + kType.isArrowSubTypeOf() -> + (source as? File)?.let { + DataFrame.readParquet( + it, + nullability = opts.nullability, + batchSize = opts.batchSize, + ) + } + + else -> null + } + } + + override val testOrder: Int = 60_500 + + override fun toString(): String = "Parquet" +} + +private inline fun KType.isArrowSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) + private const val READ_ARROW_FEATHER = "readArrowFeather" internal const val ARROW_PARQUET_DEFAULT_BATCH_SIZE = 32768L diff --git a/dataframe-arrow/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource b/dataframe-arrow/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource new file mode 100644 index 0000000000..ef8466f22f --- /dev/null +++ b/dataframe-arrow/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource @@ -0,0 +1,3 @@ +org.jetbrains.kotlinx.dataframe.io.ArrowIPC +org.jetbrains.kotlinx.dataframe.io.ArrowFeatherNEW +org.jetbrains.kotlinx.dataframe.io.Parquet diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index b168062a49..cba2897fcb 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -58,7 +58,7 @@ public class Csv : DataFrameReadSource { ) : DataFrameReadOptions public companion object { - public val supportedTypes: Set = + public val SUPPORTED_TYPES: Set = setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) internal const val EXTENSION: String = "csv" @@ -69,7 +69,7 @@ public class Csv : DataFrameReadSource { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false if (sourceInfo.mimeType?.lowercase()?.equals(MIME_TYPE) == false) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index fa605e43a0..fdf80ada21 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -58,7 +58,7 @@ public class Tsv : DataFrameReadSource { ) : DataFrameReadOptions public companion object { - public val supportedTypes: Set = + public val SUPPORTED_TYPES: Set = setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) internal const val EXTENSION: String = "tsv" @@ -69,7 +69,7 @@ public class Tsv : DataFrameReadSource { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false if (sourceInfo.mimeType?.lowercase()?.equals(MIME_TYPE) == false) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index 8830c0db55..56119ee7e4 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -81,12 +81,11 @@ public class ExcelNEW : DataFrameReadSource { ) : DataFrameReadOptions public companion object { - public val supportedTypes: Set = + public val SUPPORTED_TYPES: Set = setOf( typeOf(), typeOf(), typeOf(), - typeOf(), typeOf(), typeOf(), typeOf(), @@ -105,7 +104,7 @@ public class ExcelNEW : DataFrameReadSource { if (ext != null && ext !in EXTENSIONS) return false val mime = sourceInfo.mimeType?.lowercase() if (mime != null && mime !in MIME_TYPES) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt index 1d29bc328e..78948a5514 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt @@ -81,7 +81,7 @@ public class Jdbc2 : DataFrameReadSource { ) : DataFrameReadOptions public companion object { - public val supportedTypes: Set = + public val SUPPORTED_TYPES: Set = setOf( typeOf(), typeOf(), @@ -92,7 +92,7 @@ public class Jdbc2 : DataFrameReadSource { override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 679e728ce6..1d24f32abc 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -46,7 +46,7 @@ public class Json : DataFrameReadSource { ) : DataFrameReadOptions public companion object { - public val supportedTypes: Set = + public val SUPPORTED_TYPES: Set = setOf( typeOf(), typeOf(), @@ -61,7 +61,7 @@ public class Json : DataFrameReadSource { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals("json") == false) return false if (sourceInfo.mimeType?.lowercase()?.equals("application/json") == false) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } } @OptIn(ExperimentalSerializationApi::class) From 36b722b82fa18bad19428c5d75e89b453b6e57ac Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 18 May 2026 14:15:39 +0200 Subject: [PATCH 08/20] moved supportedType to `DataFrameReadSource` so we could use it later in converters/parsers --- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 11 +++ .../kotlinx/dataframe/io/arrowReading.kt | 92 +++++++++---------- .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 8 +- .../org/jetbrains/kotlinx/dataframe/io/tsv.kt | 8 +- .../jetbrains/kotlinx/dataframe/io/xlsx.kt | 24 ++--- .../jetbrains/kotlinx/dataframe/io/Jdbc.kt | 18 ++-- .../jetbrains/kotlinx/dataframe/io/json.kt | 22 ++--- 7 files changed, 96 insertions(+), 87 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index 7725f1204e..7e1c5ff818 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -21,6 +21,17 @@ import kotlin.reflect.typeOf public interface DataFrameReadOptions public interface DataFrameReadSource { + /** + * The set of source [KType]s this format knows how to read. The framework uses this in the default + * [acceptsSource] implementation, and overriding `acceptsSource` implementations should still consult it + * so that adding a new supported type only requires updating this set. + * + * Note: a `String` *reference* (path/URL) is normalized to a [URL] by `readSourceImpl` before any format + * is invoked, so only include `String` here when raw text content is a legitimate input (e.g., JSON/CSV + * text). For binary formats, leave `String` out. + */ + public val supportedTypes: Set + public fun readDataFrameOrNull( source: Any, sourceInfo: DataSourceInfo, diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt index 63161add45..be208cb93e 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt @@ -53,25 +53,25 @@ public class ArrowFeatherNEW : DataFrameReadSource { public data class Options(val nullability: NullabilityOptions = NullabilityOptions.Infer) : DataFrameReadOptions - public companion object { - public val SUPPORTED_TYPES: Set = - setOf( - typeOf(), - typeOf(), - typeOf(), - typeOf(), - typeOf(), - typeOf(), - typeOf(), - ) + override val supportedTypes: Set = + setOf( + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + ) + public companion object { internal const val EXTENSION: String = "feather" } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false - return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( @@ -83,14 +83,14 @@ public class ArrowFeatherNEW : DataFrameReadSource { val kType = sourceInfo.kType // ArrowReader is exclusive; check before more general types. - if (kType.isArrowSubTypeOf()) { + if (kType.isSubTypeOf()) { return (source as? ArrowReader)?.let { DataFrame.readArrow(it, opts.nullability) } } val url: URL? = when { - kType.isArrowSubTypeOf() -> source as? URL - kType.isArrowSubTypeOf() -> (source as? Path)?.toUri()?.toURL() - kType.isArrowSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() else -> null } if (url != null) { @@ -98,15 +98,15 @@ public class ArrowFeatherNEW : DataFrameReadSource { } return when { - kType.isArrowSubTypeOf() -> + kType.isSubTypeOf() -> (source as? SeekableByteChannel)?.let { DataFrame.readArrowFeather(it, nullability = opts.nullability) } - kType.isArrowSubTypeOf() -> + kType.isSubTypeOf() -> (source as? ByteArray)?.let { DataFrame.readArrowFeather(it, opts.nullability) } - kType.isArrowSubTypeOf() -> + kType.isSubTypeOf() -> (source as? InputStream)?.let { DataFrame.readArrowFeather(it, opts.nullability) } else -> null @@ -138,25 +138,25 @@ public class ArrowIPC : DataFrameReadSource { val nullability: NullabilityOptions = NullabilityOptions.Infer, ) : DataFrameReadOptions - public companion object { - public val SUPPORTED_TYPES: Set = - setOf( - typeOf(), - typeOf(), - typeOf(), - typeOf(), - typeOf(), - typeOf(), - typeOf(), - ) + override val supportedTypes: Set = + setOf( + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + ) + public companion object { internal const val EXTENSION: String = "arrow" } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false - return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( @@ -167,14 +167,14 @@ public class ArrowIPC : DataFrameReadSource { val opts = (options ?: Options()) as Options val kType = sourceInfo.kType - if (kType.isArrowSubTypeOf()) { + if (kType.isSubTypeOf()) { return (source as? ArrowReader)?.let { DataFrame.readArrow(it, opts.nullability) } } val url: URL? = when { - kType.isArrowSubTypeOf() -> source as? URL - kType.isArrowSubTypeOf() -> (source as? Path)?.toUri()?.toURL() - kType.isArrowSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() else -> null } if (url != null) { @@ -182,15 +182,15 @@ public class ArrowIPC : DataFrameReadSource { } return when { - kType.isArrowSubTypeOf() -> + kType.isSubTypeOf() -> (source as? ReadableByteChannel)?.let { DataFrame.readArrowIPC(it, allocator = opts.allocator, nullability = opts.nullability) } - kType.isArrowSubTypeOf() -> + kType.isSubTypeOf() -> (source as? ByteArray)?.let { DataFrame.readArrowIPC(it, opts.nullability) } - kType.isArrowSubTypeOf() -> + kType.isSubTypeOf() -> (source as? InputStream)?.let { DataFrame.readArrowIPC(it, opts.nullability) } else -> null @@ -221,17 +221,17 @@ public class Parquet : DataFrameReadSource { val batchSize: Long = ARROW_PARQUET_DEFAULT_BATCH_SIZE, ) : DataFrameReadOptions - public companion object { - public val SUPPORTED_TYPES: Set = - setOf(typeOf(), typeOf(), typeOf()) + override val supportedTypes: Set = + setOf(typeOf(), typeOf(), typeOf()) + public companion object { internal const val EXTENSION: String = "parquet" } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false - return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( @@ -242,7 +242,7 @@ public class Parquet : DataFrameReadSource { val opts = (options ?: Options()) as Options val kType = sourceInfo.kType return when { - kType.isArrowSubTypeOf() -> + kType.isSubTypeOf() -> (source as? URL)?.let { DataFrame.readParquet( it, @@ -251,7 +251,7 @@ public class Parquet : DataFrameReadSource { ) } - kType.isArrowSubTypeOf() -> + kType.isSubTypeOf() -> (source as? Path)?.let { DataFrame.readParquet( it, @@ -260,7 +260,7 @@ public class Parquet : DataFrameReadSource { ) } - kType.isArrowSubTypeOf() -> + kType.isSubTypeOf() -> (source as? File)?.let { DataFrame.readParquet( it, @@ -278,7 +278,7 @@ public class Parquet : DataFrameReadSource { override fun toString(): String = "Parquet" } -private inline fun KType.isArrowSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) +private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) private const val READ_ARROW_FEATHER = "readArrowFeather" diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index cba2897fcb..e9fd8c71f0 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -57,10 +57,10 @@ public class Csv : DataFrameReadSource { val parseParallel: Boolean = DelimParams.PARSE_PARALLEL, ) : DataFrameReadOptions - public companion object { - public val SUPPORTED_TYPES: Set = - setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) + override val supportedTypes: Set = + setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) + public companion object { internal const val EXTENSION: String = "csv" internal const val MIME_TYPE: String = "text/csv" } @@ -69,7 +69,7 @@ public class Csv : DataFrameReadSource { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false if (sourceInfo.mimeType?.lowercase()?.equals(MIME_TYPE) == false) return false - return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index fdf80ada21..48ae4aca50 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -57,10 +57,10 @@ public class Tsv : DataFrameReadSource { val parseParallel: Boolean = DelimParams.PARSE_PARALLEL, ) : DataFrameReadOptions - public companion object { - public val SUPPORTED_TYPES: Set = - setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) + override val supportedTypes: Set = + setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) + public companion object { internal const val EXTENSION: String = "tsv" internal const val MIME_TYPE: String = "text/tab-separated-values" } @@ -69,7 +69,7 @@ public class Tsv : DataFrameReadSource { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false if (sourceInfo.mimeType?.lowercase()?.equals(MIME_TYPE) == false) return false - return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index 56119ee7e4..dcb683319d 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -80,17 +80,19 @@ public class ExcelNEW : DataFrameReadSource { val parseEmptyAsNull: Boolean = true, ) : DataFrameReadOptions - public companion object { - public val SUPPORTED_TYPES: Set = - setOf( - typeOf(), - typeOf(), - typeOf(), - typeOf(), - typeOf(), - typeOf(), - ) + // String reference paths are normalized to URL by readSourceImpl, so no String entry here; + // Excel is binary, so raw String content isn't a meaningful input either. + override val supportedTypes: Set = + setOf( + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + ) + public companion object { internal val EXTENSIONS: Set = setOf("xls", "xlsx") internal val MIME_TYPES: Set = setOf( "application/vnd.ms-excel", @@ -104,7 +106,7 @@ public class ExcelNEW : DataFrameReadSource { if (ext != null && ext !in EXTENSIONS) return false val mime = sourceInfo.mimeType?.lowercase() if (mime != null && mime !in MIME_TYPES) return false - return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt index 78948a5514..b78be745f1 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt @@ -80,19 +80,17 @@ public class Jdbc2 : DataFrameReadSource { val resultSetConnection: Connection? = null, ) : DataFrameReadOptions - public companion object { - public val SUPPORTED_TYPES: Set = - setOf( - typeOf(), - typeOf(), - typeOf(), - typeOf(), - ) - } + override val supportedTypes: Set = + setOf( + typeOf(), + typeOf(), + typeOf(), + typeOf(), + ) override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false - return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } override fun readDataFrameOrNull( diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 1d24f32abc..431fdbe427 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -45,23 +45,21 @@ public class Json : DataFrameReadSource { val unifyNumbers: Boolean = true, ) : DataFrameReadOptions - public companion object { - public val SUPPORTED_TYPES: Set = - setOf( - typeOf(), - typeOf(), - typeOf(), - typeOf(), - typeOf(), - typeOf(), - ) - } + override val supportedTypes: Set = + setOf( + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + typeOf(), + ) override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals("json") == false) return false if (sourceInfo.mimeType?.lowercase()?.equals("application/json") == false) return false - return SUPPORTED_TYPES.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } @OptIn(ExperimentalSerializationApi::class) From a8ce71221b4a7d6468c7aaf3e135feb97de291a6 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 18 May 2026 15:04:23 +0200 Subject: [PATCH 09/20] DataFrameReadSource openapi support --- core/build.gradle.kts | 1 + .../kotlinx/dataframe/api/generateCode.kt | 2 + .../jetbrains/kotlinx/dataframe/io/guess2.kt | 50 ++++++- .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 45 +++++++ .../jetbrains/kotlinx/dataframe/io/OpenApi.kt | 127 ++++++++++++++++++ 5 files changed, 224 insertions(+), 1 deletion(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 962ac51863..410332d914 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -90,6 +90,7 @@ dependencies { // testImplementation(projects.dataframeGeo) testImplementation(projects.dataframeJdbc) testImplementation(libs.h2db) + testImplementation(projects.dataframeOpenapiGenerator) } // Configure testJava16 dependencies to extend from test diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/generateCode.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/generateCode.kt index 5137cf200c..4ad31062af 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/generateCode.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/generateCode.kt @@ -297,6 +297,8 @@ public val NameNormalizer.Companion.default: NameNormalizer get() = NameNormaliz @RequiredByIntellijPlugin public value class CodeString(public val value: String) { override fun toString(): String = value + + public companion object } @PublishedApi diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index 7e1c5ff818..a86a0a2d88 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -2,6 +2,8 @@ package org.jetbrains.kotlinx.dataframe.io import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.CodeString +import org.jetbrains.kotlinx.dataframe.api.generateInterfaces import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import java.io.ByteArrayInputStream @@ -12,7 +14,6 @@ import java.net.URI import java.net.URL import java.nio.file.Path import java.util.ServiceLoader -import kotlin.io.extension import kotlin.io.path.extension import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -51,6 +52,15 @@ public interface DataFrameReadSource { options: DataFrameReadOptions? = null, ): DataFrameSchema? = readDataFrameOrNull(source, sourceInfo, options)?.schema() + public fun readDataSchemaCodeOrNull( + source: Any, + sourceInfo: DataSourceInfo, + name: String, + options: DataFrameReadOptions? = null, + ): CodeString? = + readDataFrameSchemaOrNull(source, sourceInfo, options) + ?.generateInterfaces(name) + public fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean // `DataFrame.Companion.read` methods uses this to sort list of all supported formats in ascending order (-1, 2, 10) @@ -220,6 +230,44 @@ public inline fun DataFrameSchema.Companion.readSource( options: DataFrameReadOptions? = null, ): DataFrameSchema = readSource(source = source, type = typeOf(), options = options) +/** + * Code-generation counterpart of [DataFrame.Companion.readSource]: dispatches through every registered + * [DataFrameReadSource] and returns a [CodeString] containing the generated `@DataSchema` interface + * declarations (plus enums/typealiases for formats like OpenAPI). The [name] is the marker name used for + * the top-level generated interface. + * + * The default implementation in [DataFrameReadSource.readDataSchemaCodeOrNull] runs + * [DataFrameSchema.generateInterfaces] on the format's [DataFrameReadSource.readDataFrameSchemaOrNull] + * result; formats that produce richer code (OpenAPI markers, enums, typealiases) override the method + * directly. + */ +public fun CodeString.Companion.readSource( + source: Any, + type: KType, + name: String, + options: DataFrameReadOptions? = null, +): CodeString = + readSourceImpl( + source = source, + sourceInfo = DataSourceInfo( + kType = type.withNullability(false), + extension = source.extensionOrNull(), + mimeType = null, // TODO, Apache Tika? + ), + options = options, + formats = newSupportedFormats, + resultKind = "CodeString", + readOrNull = { src, info, opts -> + readDataSchemaCodeOrNull(src, info, name, opts) + }, + ) + +public inline fun CodeString.Companion.readSource( + source: R, + name: String, + options: DataFrameReadOptions? = null, +): CodeString = readSource(source = source, type = typeOf(), name = name, options = options) + internal fun Any.extensionOrNull(): String? = when (this) { is Path -> extension diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index 70534d7771..fe479194d9 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -1,10 +1,12 @@ package org.jetbrains.kotlinx.dataframe.io import io.kotest.matchers.shouldBe +import io.kotest.matchers.string.shouldContain import kotlinx.serialization.json.Json import kotlinx.serialization.json.JsonElement import org.apache.poi.ss.usermodel.WorkbookFactory import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.CodeString import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.io.db.H2 import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema @@ -432,4 +434,47 @@ class Guess2 { val options = Parquet.Options() DataFrame.readSource(parquetFile, options) shouldBe expected } + + @Test + fun `read OpenAPI yaml as code`() { + val openApiFile = File("src/test/resources/petstore.yaml") + + // The reference call from the existing helper, used as the ground truth. + val expected = readOpenApiAsString( + openApiAsString = openApiFile.readText(), + name = "Petstore", + extensionProperties = false, + generateHelperCompanionObject = false, + ) + + // String path / File / Path / URL all route through readSourceImpl to OpenApi2. + CodeString.readSource(openApiFile.path, name = "Petstore").value shouldBe expected + CodeString.readSource(openApiFile, name = "Petstore").value shouldBe expected + CodeString.readSource(Path(openApiFile.path), name = "Petstore").value shouldBe expected + CodeString.readSource( + Path(openApiFile.path).absolute().normalize().toUri().toURL(), + name = "Petstore", + ).value shouldBe expected + + // String content path (raw spec text) also works. + CodeString.readSource(openApiFile.readText(), name = "Petstore").value shouldBe expected + } + + @Test + fun `OpenAPI does not steal plain JSON DataFrame reads`() { + // A regular JSON file (not an OpenAPI spec) still goes to Json, even though OpenApi2 runs first. + // OpenApi2.readDataSchemaCodeOrNull returns null for non-OpenAPI content, but more importantly + // OpenApi2.readDataFrameOrNull is the interface default (null), so DataFrame reads fall through. + val expected = DataFrame.readJson("../data/participants.json") + DataFrame.readSource(File("../data/participants.json")) shouldBe expected + } + + @Test + fun `default DataSchema code generation works for JSON via interface default`() { + // The interface default reads the schema and calls generateInterfaces — exercise it on a JSON file. + val jsonFile = File("../data/participants.json") + val schemaCode = CodeString.readSource(jsonFile, name = "Participants") + // The output is non-empty and includes the marker name. + schemaCode.value shouldContain "Participants" + } } diff --git a/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt b/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt index d3650fa791..012caa049c 100644 --- a/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt +++ b/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt @@ -1,10 +1,137 @@ package org.jetbrains.kotlinx.dataframe.io +import io.swagger.v3.parser.core.models.AuthorizationValue +import io.swagger.v3.parser.core.models.ParseOptions +import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.CodeString import org.jetbrains.kotlinx.dataframe.codeGen.Code import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod +import org.jetbrains.kotlinx.dataframe.codeGen.MarkerVisibility +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import java.io.File import java.io.InputStream +import java.net.URL +import java.nio.file.Path +import kotlin.io.path.readText +import kotlin.reflect.KType +import kotlin.reflect.full.isSubtypeOf +import kotlin.reflect.typeOf + +/** + * [DataFrameReadSource] for OpenAPI specifications. + * + * OpenAPI doesn't produce a `DataFrame` or a single `DataFrameSchema` — its output is a multi-marker code + * blob (interfaces + enums + typealiases). Only [readDataSchemaCodeOrNull] is overridden; the DataFrame + * and Schema methods return `null` (via the interface's defaults), so calling + * `DataFrame.readSource(openapiFile)` falls through to JSON, while `CodeString.readSource(openapiFile, name)` + * dispatches here. + * + * `.yaml`/`.yml` files are unambiguously OpenAPI; `.json` files are disambiguated at read time by + * [isOpenApiStr] returning null early when the JSON isn't actually an OpenAPI spec, letting the framework + * fall through to the JSON format for plain data. + */ +public class OpenApi2 : DataFrameReadSource { + + public data class Options( + val auth: List? = null, + val parseOptions: ParseOptions? = null, + val extensionProperties: Boolean = false, + val generateHelperCompanionObject: Boolean = false, + val visibility: MarkerVisibility = MarkerVisibility.IMPLICIT_PUBLIC, + ) : DataFrameReadOptions + + override val supportedTypes: Set = + setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) + + public companion object { + internal val EXTENSIONS: Set = setOf("yaml", "yml", "json") + } + + override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { + if (options != null && options !is Options) return false + val ext = sourceInfo.extension?.lowercase() + if (ext != null && ext !in EXTENSIONS) return false + return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + } + + // OpenAPI doesn't produce a DataFrame. + override fun readDataFrameOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ): DataFrame<*>? = null + + // ...nor a single DataFrameSchema, it can produce enums, typealiases, etc. + // so it only supports readDataSchemaCodeOrNull() + override fun readDataFrameSchemaOrNull( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ): DataFrameSchema? = null + + override fun readDataSchemaCodeOrNull( + source: Any, + sourceInfo: DataSourceInfo, + name: String, + options: DataFrameReadOptions?, + ): CodeString? { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + + // Resolve to OpenAPI-spec text, returning null if the content isn't OpenAPI. + val text: String = when { + kType.isSubtypeOf(typeOf()) -> { + val url = (source as? URL) ?: return null + if (!isOpenApi(url)) return null + url.readText() + } + + kType.isSubtypeOf(typeOf()) -> { + val path = (source as? Path) ?: return null + if (!isOpenApi(path)) return null + path.readText() + } + + kType.isSubtypeOf(typeOf()) -> { + val file = (source as? File) ?: return null + if (!isOpenApi(file.toPath())) return null + file.readText() + } + + kType.isSubtypeOf(typeOf()) -> { + val text = (source as? String) ?: return null + if (!isOpenApiStr(text)) return null + text + } + + kType.isSubtypeOf(typeOf()) -> { + val text = (source as? InputStream)?.bufferedReader()?.readText() ?: return null + if (!isOpenApiStr(text)) return null + text + } + + else -> return null + } + + return CodeString( + readOpenApiAsString( + openApiAsString = text, + name = name, + auth = opts.auth, + options = opts.parseOptions, + extensionProperties = opts.extensionProperties, + generateHelperCompanionObject = opts.generateHelperCompanionObject, + visibility = opts.visibility, + ), + ) + } + + // Run before Json (10_000) so .json files get the OpenAPI content check first. + override val testOrder: Int = 9_000 + + override fun toString(): String = "OpenApi" +} /** * Allows for OpenApi type schemas to be converted to [DataSchema] interfaces. From b3aa890b522bf78392da280fb09ea3597239bf3d Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 18 May 2026 20:51:00 +0200 Subject: [PATCH 10/20] DataRow.readSource function --- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 24 ++++++++ .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 60 +++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index a86a0a2d88..141f950e29 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -1,10 +1,13 @@ package org.jetbrains.kotlinx.dataframe.io import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.CodeString import org.jetbrains.kotlinx.dataframe.api.generateInterfaces import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.api.single import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import java.io.ByteArrayInputStream import java.io.File @@ -181,6 +184,27 @@ public fun DataFrame.Companion.readSource(source: Any, type: KType, options: Dat readOrNull = DataFrameReadSource::readDataFrameOrNull, ) +public inline fun DataRow.Companion.readSource( + source: R, + options: DataFrameReadOptions? = null, +): AnyRow = readSource(source = source, type = typeOf(), options = options) + +public fun DataRow.Companion.readSource(source: Any, type: KType, options: DataFrameReadOptions? = null): AnyRow = + readSourceImpl( + source = source, + sourceInfo = DataSourceInfo( + kType = type.withNullability(false), + extension = source.extensionOrNull(), + mimeType = null, // TODO, Apache Tika? + ), + options = options, + formats = newSupportedFormats, + resultKind = "DataRow", + readOrNull = { source, sourceInfo, options -> + readDataFrameOrNull(source, sourceInfo, options)?.single() + }, + ) + public inline fun DataFrame.Companion.readSource( source: R, options: DataFrameReadOptions? = null, diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index fe479194d9..b3890ae66c 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -6,8 +6,10 @@ import kotlinx.serialization.json.Json import kotlinx.serialization.json.JsonElement import org.apache.poi.ss.usermodel.WorkbookFactory import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.CodeString import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.api.single import org.jetbrains.kotlinx.dataframe.io.db.H2 import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import org.junit.Test @@ -477,4 +479,62 @@ class Guess2 { // The output is non-empty and includes the marker name. schemaCode.value shouldContain "Participants" } + + // region DataRow.readSource — single-row inputs across formats + + @Test + fun `read DataRow from CSV string`() { + val csvText = "a,b,c\n1,2,3" + val expected = DataFrame.readCsvStr(csvText).single() + DataRow.readSource(csvText, Csv.Options()) shouldBe expected + } + + @Test + fun `read DataRow from TSV string`() { + val tsvText = "a\tb\tc\n1\t2\t3" + val expected = DataFrame.readTsvStr(tsvText).single() + DataRow.readSource(tsvText, Tsv.Options()) shouldBe expected + } + + @Test + fun `read DataRow from JSON string`() { + // A single-element JSON array yields a one-row DataFrame. + val jsonText = """[{"a": 1, "b": 2}]""" + val expected = DataFrame.readJsonStr(jsonText).single() + DataRow.readSource(jsonText) shouldBe expected + } + + @Test + fun `read DataRow from single-row XLSX file`() { + // sample2.xlsx has exactly one data row. + val xlsxFile = File("src/test/resources/sample2.xlsx") + val expected = DataFrame.readExcel(xlsxFile).single() + DataRow.readSource(xlsxFile) shouldBe expected + } + + @Test + fun `read DataRow from JDBC with single-row query`() { + val url = h2Url("guess2_datarow") + DriverManager.getConnection(url).use { conn -> + seed(conn) + val query = "SELECT * FROM Customer WHERE id = 1" + val expected = DataFrame.readSqlQuery(conn, query).single() + DataRow.readSource(conn, Jdbc2.Options(sqlQueryOrTableName = query)) shouldBe expected + } + } + + @Test + fun `read DataRow throws when source has multiple rows`() { + // movies.csv has many rows — DataRow.single() should fail, surfaced as the framework's + // "Unknown DataRow source" since the exception is caught and converted. + val movies = File("../data/movies.csv") + try { + DataRow.readSource(movies) + error("Expected DataRow.readSource to fail on a multi-row CSV") + } catch (_: IllegalArgumentException) { + // expected + } + } + + // endregion } From 7b759f6d3d9e3d673cb378822ee3fdeff3a1898e Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 18 May 2026 21:43:37 +0200 Subject: [PATCH 11/20] put readSource functionality in convert operation --- .../kotlinx/dataframe/impl/api/convert.kt | 56 ++++++++++- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 85 ++++++++++++---- .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 96 +++++++++++++++++++ 3 files changed, 211 insertions(+), 26 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt index fbaafd0370..f5c65ea63b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt @@ -22,6 +22,7 @@ import kotlinx.datetime.toStdlibInstant import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowColumnExpression import org.jetbrains.kotlinx.dataframe.RowValueExpression import org.jetbrains.kotlinx.dataframe.api.Convert @@ -30,7 +31,6 @@ import org.jetbrains.kotlinx.dataframe.api.Infer import org.jetbrains.kotlinx.dataframe.api.ParserOptions import org.jetbrains.kotlinx.dataframe.api.asColumn import org.jetbrains.kotlinx.dataframe.api.isValueColumn -import org.jetbrains.kotlinx.dataframe.api.mapIndexed import org.jetbrains.kotlinx.dataframe.api.name import org.jetbrains.kotlinx.dataframe.columns.values import org.jetbrains.kotlinx.dataframe.dataTypes.IFRAME @@ -41,13 +41,14 @@ import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException import org.jetbrains.kotlinx.dataframe.impl.columns.newColumn import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType -import org.jetbrains.kotlinx.dataframe.impl.isSubtypeWithNullabilityOf +import org.jetbrains.kotlinx.dataframe.io.dataFrameReadSourceByType +import org.jetbrains.kotlinx.dataframe.io.readSource import org.jetbrains.kotlinx.dataframe.path +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import org.jetbrains.kotlinx.dataframe.type import java.math.BigDecimal import java.math.BigInteger import java.net.URL -import java.util.Locale import kotlin.math.roundToInt import kotlin.math.roundToLong import kotlin.reflect.KType @@ -65,6 +66,7 @@ import kotlin.time.toJavaInstant import kotlin.time.toKotlinDuration import kotlin.time.toKotlinInstant import kotlin.toBigDecimal +import kotlinx.datetime.Instant as DeprecatedInstant import java.time.Duration as JavaDuration import java.time.Instant as JavaInstant import java.time.LocalDate as JavaLocalDate @@ -73,7 +75,6 @@ import java.time.LocalTime as JavaLocalTime import kotlin.time.Instant as StdlibInstant import kotlin.toBigDecimal as toBigDecimalKotlin import kotlin.toBigInteger as toBigIntegerKotlin -import kotlinx.datetime.Instant as DeprecatedInstant @PublishedApi internal fun Convert.withRowCellImpl( @@ -195,16 +196,61 @@ internal inline fun convert(crossinline converter: (T) -> Any?): TypeConvert private enum class DummyEnum +private val dataFrameReadSourceSupportedClasses by lazy { + dataFrameReadSourceByType.keys.map { it.jvmErasure }.toSet() +} + @Suppress("UNCHECKED_CAST") internal fun createConverter(from: KType, to: KType, options: ParserOptions? = null): TypeConverter? { - if (from.arguments.isNotEmpty() || to.arguments.isNotEmpty()) return null if (from.isMarkedNullable) { val res = createConverter(from.withNullability(false), to, options) ?: return null return { res(it) } } val fromClass = from.jvmErasure val toClass = to.jvmErasure + + // readSource-backed conversions handle target types with type arguments (e.g. `DataFrame<*>`, + // `DataRow<*>`), so they must run before the generic-arguments early-exit below. + if (dataFrameReadSourceByType.any { from.isSubtypeOf(it.key) }) { + val readSources = dataFrameReadSourceByType.entries + .first { from.isSubtypeOf(it.key) }.value + + when (toClass) { + DataFrame::class -> + return convert { source -> + DataFrame.readSource( + source = source, + type = from, + options = null, + formats = readSources, + ) + } + + DataRow::class -> + return convert { source -> + DataRow.readSource( + source = source, + type = from, + options = null, + formats = readSources, + ) + } + + DataFrameSchema::class -> + return convert { source -> + DataFrameSchema.readSource( + source = source, + type = from, + options = null, + formats = readSources, + ) + } + } + } + return when { + from.arguments.isNotEmpty() || to.arguments.isNotEmpty() -> null + fromClass == toClass -> TypeConverterIdentity // kotlin.time.Duration is a value class, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index 141f950e29..f004e96939 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -89,6 +89,7 @@ public data class DataSourceInfo( * resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource * to be detected here. */ +@PublishedApi internal val newSupportedFormats: List by lazy { ServiceLoader.load(DataFrameReadSource::class.java) .toList() @@ -96,6 +97,28 @@ internal val newSupportedFormats: List by lazy { .sortedBy { it.testOrder } } +internal val dataFrameReadSourceByType: Map> by lazy { + buildMap> { + newSupportedFormats.forEach { format -> + format.supportedTypes.forEach { type -> + getOrPut(type) { mutableListOf() }.let { + if (format !in it) it += format + } + + // special String -> URL case + if (type == typeOf()) { + getOrPut(typeOf()) { mutableListOf() }.let { + if (format !in it) it += format + } + } + } + } + values.forEach { + it.sortBy { it.testOrder } + } + } +} + /** * Shared dispatch loop for [readDataFrameImpl] and [readDataFrameSchemaImpl]: handles String→URL * normalization, InputStream buffering, sorted iteration, and error aggregation. The per-format read @@ -170,7 +193,12 @@ internal fun readSourceImpl( * entries in `guess.kt` that use the older [SupportedDataFrameFormat] system. Once the legacy entries are * retired, this can be renamed to `read`. */ -public fun DataFrame.Companion.readSource(source: Any, type: KType, options: DataFrameReadOptions? = null): AnyFrame = +public fun DataFrame.Companion.readSource( + source: Any, + type: KType, + options: DataFrameReadOptions? = null, + formats: List = newSupportedFormats, +): AnyFrame = readSourceImpl( source = source, sourceInfo = DataSourceInfo( @@ -179,7 +207,7 @@ public fun DataFrame.Companion.readSource(source: Any, type: KType, options: Dat mimeType = null, // TODO, Apache Tika? ), options = options, - formats = newSupportedFormats, + formats = formats, resultKind = "DataFrame", readOrNull = DataFrameReadSource::readDataFrameOrNull, ) @@ -187,9 +215,15 @@ public fun DataFrame.Companion.readSource(source: Any, type: KType, options: Dat public inline fun DataRow.Companion.readSource( source: R, options: DataFrameReadOptions? = null, -): AnyRow = readSource(source = source, type = typeOf(), options = options) + formats: List = newSupportedFormats, +): AnyRow = readSource(source = source, type = typeOf(), options = options, formats = formats) -public fun DataRow.Companion.readSource(source: Any, type: KType, options: DataFrameReadOptions? = null): AnyRow = +public fun DataRow.Companion.readSource( + source: Any, + type: KType, + options: DataFrameReadOptions? = null, + formats: List = newSupportedFormats, +): AnyRow = readSourceImpl( source = source, sourceInfo = DataSourceInfo( @@ -198,7 +232,7 @@ public fun DataRow.Companion.readSource(source: Any, type: KType, options: DataF mimeType = null, // TODO, Apache Tika? ), options = options, - formats = newSupportedFormats, + formats = formats, resultKind = "DataRow", readOrNull = { source, sourceInfo, options -> readDataFrameOrNull(source, sourceInfo, options)?.single() @@ -208,7 +242,14 @@ public fun DataRow.Companion.readSource(source: Any, type: KType, options: DataF public inline fun DataFrame.Companion.readSource( source: R, options: DataFrameReadOptions? = null, -): AnyFrame = readSource(source = source, type = typeOf(), options = options) + formats: List = newSupportedFormats, +): AnyFrame = + readSource( + source = source, + type = typeOf(), + options = options, + formats = formats, + ) /** * Schema-only counterpart of [DataFrame.Companion.readSource]: dispatches through every registered @@ -220,6 +261,7 @@ public fun DataFrameSchema.Companion.readSource( source: Any, type: KType, options: DataFrameReadOptions? = null, + formats: List = newSupportedFormats, ): DataFrameSchema = readSourceImpl( source = source, @@ -229,31 +271,23 @@ public fun DataFrameSchema.Companion.readSource( mimeType = null, // TODO, Apache Tika? ), options = options, - formats = newSupportedFormats, + formats = formats, resultKind = "DataFrameSchema", readOrNull = DataFrameReadSource::readDataFrameSchemaOrNull, ) -internal fun readDataFrameSchemaImpl( - source: Any, - sourceInfo: DataSourceInfo, +public inline fun DataFrameSchema.Companion.readSource( + source: R, options: DataFrameReadOptions? = null, formats: List = newSupportedFormats, ): DataFrameSchema = - readSourceImpl( + readSource( source = source, - sourceInfo = sourceInfo, + type = typeOf(), options = options, formats = formats, - resultKind = "DataFrameSchema", - readOrNull = DataFrameReadSource::readDataFrameSchemaOrNull, ) -public inline fun DataFrameSchema.Companion.readSource( - source: R, - options: DataFrameReadOptions? = null, -): DataFrameSchema = readSource(source = source, type = typeOf(), options = options) - /** * Code-generation counterpart of [DataFrame.Companion.readSource]: dispatches through every registered * [DataFrameReadSource] and returns a [CodeString] containing the generated `@DataSchema` interface @@ -270,6 +304,7 @@ public fun CodeString.Companion.readSource( type: KType, name: String, options: DataFrameReadOptions? = null, + formats: List = newSupportedFormats, ): CodeString = readSourceImpl( source = source, @@ -279,7 +314,7 @@ public fun CodeString.Companion.readSource( mimeType = null, // TODO, Apache Tika? ), options = options, - formats = newSupportedFormats, + formats = formats, resultKind = "CodeString", readOrNull = { src, info, opts -> readDataSchemaCodeOrNull(src, info, name, opts) @@ -290,7 +325,15 @@ public inline fun CodeString.Companion.readSource( source: R, name: String, options: DataFrameReadOptions? = null, -): CodeString = readSource(source = source, type = typeOf(), name = name, options = options) + formats: List = newSupportedFormats, +): CodeString = + readSource( + source = source, + type = typeOf(), + name = name, + options = options, + formats = formats, + ) internal fun Any.extensionOrNull(): String? = when (this) { diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index b3890ae66c..990ccefd17 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -8,8 +8,13 @@ import org.apache.poi.ss.usermodel.WorkbookFactory import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.CodeString +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.convert +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.named import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.api.single +import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.io.db.H2 import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import org.junit.Test @@ -537,4 +542,95 @@ class Guess2 { } // endregion + + // region convert API integration — convert { col }.to() + // + // Frame columns are typed by their schema, so each column being converted must contain sources of the + // same shape. Mixing, say, a CSV-shaped source and a JSON-shaped source in the same column would yield + // a FrameColumn with no coherent single schema — these tests keep each column homogeneous and put + // differently-shaped sources into separate columns. + + @Test + fun `convert column of CSV files to DataFrame`() { + // Two cells, both pointing at the same CSV → uniform shape in the resulting FrameColumn. + val csvFile = File("../data/movies.csv") + val df = dataFrameOf("source")(csvFile, csvFile) + + val converted = df.convert("source").to>() + + val expected = DataFrame.readCsv(csvFile) + converted["source"][0] shouldBe expected + converted["source"][1] shouldBe expected + } + + @Test + fun `convert column of CSV files to DataFrameSchema`() { + val csvFile = File("../data/movies.csv") + val df = dataFrameOf("source")(csvFile, csvFile) + + val converted = df.convert("source").to() + + val expected = DataFrame.readCsv(csvFile).schema() + converted["source"][0] shouldBe expected + converted["source"][1] shouldBe expected + } + + @Test + fun `convert column of single-row XLSX files to DataRow`() { + // sample2.xlsx has exactly one data row, so .to>() works for each cell. + val xlsxFile = File("src/test/resources/sample2.xlsx") + val df = dataFrameOf("source")(xlsxFile, xlsxFile) + + val converted = df.convert("source").to>() + + val expected = DataFrame.readExcel(xlsxFile).single() + converted["source"][0] shouldBe expected + converted["source"][1] shouldBe expected + } + + @Test + fun `convert column of String content to DataFrame`() { + // Multiple parallel JSON content strings (same shape) → uniform FrameColumn. + val text = """[{"a": 1, "b": 2}]""" + val df = dataFrameOf("source")(text, text) + + val converted = df.convert("source").to>() + + val expected = DataFrame.readJsonStr(text) + converted["source"][0] shouldBe expected + converted["source"][1] shouldBe expected + } + + @Test + fun `convert two homogeneous source columns at once`() { + // Each column is internally uniform: csvCol has CSV-shaped cells, jsonCol has JSON-shaped cells. + // The result is two FrameColumns, each with its own coherent schema. + val csvFile = File("../data/movies.csv") + val jsonFile = File("../data/participants.json") + val df = dataFrameOf("csvCol", "jsonCol")(csvFile, jsonFile, csvFile, jsonFile) + + val converted = df.convert("csvCol", "jsonCol").to>() + + val expectedCsv = DataFrame.readCsv(csvFile) + val expectedJson = DataFrame.readJson(jsonFile) + converted["csvCol"][0] shouldBe expectedCsv + converted["csvCol"][1] shouldBe expectedCsv + converted["jsonCol"][0] shouldBe expectedJson + converted["jsonCol"][1] shouldBe expectedJson + } + + @Test + fun `convert column of URLs to DataFrame`() { + // Two URLs pointing at the same JSON file → uniform schema in the FrameColumn. + val jsonUrl = File("../data/participants.json").toURI().toURL() + val urls = columnOf(jsonUrl, jsonUrl) named "source" + val df = urls.toDataFrame() + + val converted = df.convert("source").to>() + val expected = DataFrame.readJson(jsonUrl) + converted["source"][0] shouldBe expected + converted["source"][1] shouldBe expected + } + + // endregion } From 1931297742a78f091bc53b075f9b0d1326bf1862 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 19 May 2026 14:51:37 +0200 Subject: [PATCH 12/20] using apache tika to sniff mime types --- core/build.gradle.kts | 1 + .../kotlinx/dataframe/impl/api/convert.kt | 2 +- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 94 ++++++++++++++----- .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 8 ++ .../kotlinx/dataframe/io/arrowReading.kt | 5 + .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 12 ++- .../org/jetbrains/kotlinx/dataframe/io/tsv.kt | 12 ++- .../jetbrains/kotlinx/dataframe/io/xlsx.kt | 2 + .../jetbrains/kotlinx/dataframe/io/json.kt | 18 +++- .../jetbrains/kotlinx/dataframe/io/OpenApi.kt | 15 +++ gradle/libs.versions.toml | 2 + 11 files changed, 133 insertions(+), 38 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 410332d914..07bdd2f173 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -59,6 +59,7 @@ dependencies { implementation(libs.commonsIo) implementation(libs.fastDoubleParser) + implementation(libs.tika) api(libs.kotlin.datetimeJvm) implementation(libs.kotlinpoet) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt index f5c65ea63b..74a7e6564a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt @@ -66,7 +66,6 @@ import kotlin.time.toJavaInstant import kotlin.time.toKotlinDuration import kotlin.time.toKotlinInstant import kotlin.toBigDecimal -import kotlinx.datetime.Instant as DeprecatedInstant import java.time.Duration as JavaDuration import java.time.Instant as JavaInstant import java.time.LocalDate as JavaLocalDate @@ -75,6 +74,7 @@ import java.time.LocalTime as JavaLocalTime import kotlin.time.Instant as StdlibInstant import kotlin.toBigDecimal as toBigDecimalKotlin import kotlin.toBigInteger as toBigIntegerKotlin +import kotlinx.datetime.Instant as DeprecatedInstant @PublishedApi internal fun Convert.withRowCellImpl( diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index f004e96939..858cee018a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -1,5 +1,10 @@ package org.jetbrains.kotlinx.dataframe.io +import org.apache.tika.detect.DefaultDetector +import org.apache.tika.io.TikaInputStream +import org.apache.tika.metadata.Metadata +import org.apache.tika.metadata.TikaCoreProperties +import org.apache.tika.mime.MediaType import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.DataFrame @@ -12,12 +17,14 @@ import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import java.io.ByteArrayInputStream import java.io.File import java.io.FileNotFoundException +import java.io.IOException import java.io.InputStream import java.net.URI import java.net.URL import java.nio.file.Path import java.util.ServiceLoader import kotlin.io.path.extension +import kotlin.io.path.name import kotlin.reflect.KType import kotlin.reflect.full.withNullability import kotlin.reflect.typeOf @@ -80,9 +87,14 @@ public interface DataFrameReadSource { public data class DataSourceInfo( public val kType: KType, public val extension: String? = null, - // TODO, Apache Tika? public val mimeType: String? = null, -) +) { + init { + if (mimeType != null) { + println() + } + } +} /** * NOTE: Needs to have fully qualified name in @@ -129,7 +141,7 @@ internal val dataFrameReadSourceByType: Map> by */ internal fun readSourceImpl( source: Any, - sourceInfo: DataSourceInfo, + sourceType: KType, options: DataFrameReadOptions?, formats: List, resultKind: String, @@ -144,7 +156,7 @@ internal fun readSourceImpl( if (url != null) { return readSourceImpl( source = url, - sourceInfo = sourceInfo.copy(kType = typeOf()), + sourceType = typeOf(), options = options, formats = formats, resultKind = resultKind, @@ -166,6 +178,12 @@ internal fun readSourceImpl( else -> source } + val sourceInfo = DataSourceInfo( + kType = sourceType, + extension = getSource().extensionOrNull(), + mimeType = getSource().mimeTypeOrNull(), + ) + val tries = mutableMapOf() formats.sortedBy { it.testOrder }.forEach { if (!it.acceptsSource(sourceInfo, options)) return@forEach @@ -201,11 +219,7 @@ public fun DataFrame.Companion.readSource( ): AnyFrame = readSourceImpl( source = source, - sourceInfo = DataSourceInfo( - kType = type.withNullability(false), - extension = source.extensionOrNull(), - mimeType = null, // TODO, Apache Tika? - ), + sourceType = type.withNullability(false), options = options, formats = formats, resultKind = "DataFrame", @@ -226,11 +240,7 @@ public fun DataRow.Companion.readSource( ): AnyRow = readSourceImpl( source = source, - sourceInfo = DataSourceInfo( - kType = type.withNullability(false), - extension = source.extensionOrNull(), - mimeType = null, // TODO, Apache Tika? - ), + sourceType = type.withNullability(false), options = options, formats = formats, resultKind = "DataRow", @@ -265,11 +275,7 @@ public fun DataFrameSchema.Companion.readSource( ): DataFrameSchema = readSourceImpl( source = source, - sourceInfo = DataSourceInfo( - kType = type.withNullability(false), - extension = source.extensionOrNull(), - mimeType = null, // TODO, Apache Tika? - ), + sourceType = type.withNullability(false), options = options, formats = formats, resultKind = "DataFrameSchema", @@ -308,11 +314,7 @@ public fun CodeString.Companion.readSource( ): CodeString = readSourceImpl( source = source, - sourceInfo = DataSourceInfo( - kType = type.withNullability(false), - extension = source.extensionOrNull(), - mimeType = null, // TODO, Apache Tika? - ), + sourceType = type.withNullability(false), options = options, formats = formats, resultKind = "CodeString", @@ -335,6 +337,48 @@ public inline fun CodeString.Companion.readSource( formats = formats, ) +private val tikaDetector by lazy { DefaultDetector() } + +internal fun Any.mimeTypeOrNull(): String? { + val inputStream = try { + when (this) { + is Path -> TikaInputStream.get(this) + + is File -> + @Suppress("DEPRECATION") + TikaInputStream.get(this) + + is URL -> TikaInputStream.get(this) + + is InputStream -> TikaInputStream.get(this) + + is ByteArray -> TikaInputStream.get(this) + + else -> null + } + } catch (_: IOException) { + null + } ?: return null + + val metadata = Metadata().apply { + if (inputStream.hasFile()) { + add(TikaCoreProperties.RESOURCE_NAME_KEY, inputStream.path.name) + } + } + return try { + val detected = tikaDetector.detect(inputStream, metadata) + return when { + detected == MediaType.OCTET_STREAM -> null + detected == MediaType.TEXT_PLAIN -> null + detected == MediaType.EMPTY -> null + detected.toString().isEmpty() -> null + else -> detected.toString() + } + } catch (_: IOException) { + null + } +} + internal fun Any.extensionOrNull(): String? = when (this) { is Path -> extension @@ -350,7 +394,7 @@ internal fun Any.extensionOrNull(): String? = } else -> null - } + }?.lowercase() /** * Non-throwing variant of [asUrl]: returns the [URL] iff [string] is a recognized URL (`http`/`https`/`ftp`) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index 990ccefd17..d4b9503a95 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -243,13 +243,21 @@ class Guess2 { // DataSource — opens a fresh connection each call (DataSource.readDataFrame closes it via `use`). val dataSource = object : DataSource { override fun getConnection() = DriverManager.getConnection(url) + override fun getConnection(u: String?, p: String?) = DriverManager.getConnection(url) + override fun getLogWriter() = null + override fun setLogWriter(out: java.io.PrintWriter?) {} + override fun setLoginTimeout(seconds: Int) {} + override fun getLoginTimeout() = 0 + override fun getParentLogger() = throw UnsupportedOperationException() + override fun unwrap(iface: Class?): T = throw UnsupportedOperationException() + override fun isWrapperFor(iface: Class<*>?) = false } DataFrame.readSource(dataSource, tableOpts) shouldBe expected diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt index be208cb93e..bc4f1b78df 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt @@ -226,11 +226,16 @@ public class Parquet : DataFrameReadSource { public companion object { internal const val EXTENSION: String = "parquet" + internal val MIME_TYPES = setOf( + "application/x-parquet", + "application/parquet", + ) } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false + if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index e9fd8c71f0..4e77e63d50 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -61,14 +61,18 @@ public class Csv : DataFrameReadSource { setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) public companion object { - internal const val EXTENSION: String = "csv" - internal const val MIME_TYPE: String = "text/csv" + internal val EXTENSIONS = setOf("csv", "zip", "gz") + internal val MIME_TYPES = setOf( + "text/csv", + "application/zip", + "application/gzip", + ) } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false - if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false - if (sourceInfo.mimeType?.lowercase()?.equals(MIME_TYPE) == false) return false + if (sourceInfo.extension != null && sourceInfo.extension !in EXTENSIONS) return false + if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index 48ae4aca50..96e026ab69 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -61,14 +61,18 @@ public class Tsv : DataFrameReadSource { setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) public companion object { - internal const val EXTENSION: String = "tsv" - internal const val MIME_TYPE: String = "text/tab-separated-values" + internal val EXTENSIONS = setOf("tsv", "zip", "gz") + internal val MIME_TYPE = setOf( + "text/tab-separated-values", + "application/zip", + "application/gzip", + ) } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false - if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false - if (sourceInfo.mimeType?.lowercase()?.equals(MIME_TYPE) == false) return false + if (sourceInfo.extension != null && sourceInfo.extension !in EXTENSIONS) return false + if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPE) return false return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index dcb683319d..543b30b333 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -97,6 +97,8 @@ public class ExcelNEW : DataFrameReadSource { internal val MIME_TYPES: Set = setOf( "application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/x-tika-ooxml", + "application/x-tika-msoffice", ) } diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 431fdbe427..7a1e9f8532 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -55,10 +55,20 @@ public class Json : DataFrameReadSource { typeOf(), ) + public companion object { + internal const val EXTENSION = "json" + internal val MIME_TYPES = setOf( + "application/json", + "application/x-json", + "text/json", + "text/x-json", + ) + } + override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false - if (sourceInfo.extension?.lowercase()?.equals("json") == false) return false - if (sourceInfo.mimeType?.lowercase()?.equals("application/json") == false) return false + if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false + if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } @@ -121,8 +131,8 @@ public class Json : DataFrameReadSource { trim().let { it.isEmpty() || !( - it.startsWith('{') && it.endsWith('}') || - it.startsWith('[') && it.endsWith(']') + (it.startsWith('{') && it.endsWith('}')) || + (it.startsWith('[') && it.endsWith(']')) ) } } diff --git a/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt b/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt index 012caa049c..ef7e087916 100644 --- a/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt +++ b/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt @@ -46,12 +46,27 @@ public class OpenApi2 : DataFrameReadSource { public companion object { internal val EXTENSIONS: Set = setOf("yaml", "yml", "json") + internal val MIME_TYPES = setOf( + "application/vnd.oai.openapi", + "application/vnd.oai.openapi+json", + "application/vnd.oai.openapi.yaml", + "application/vnd.oai.openapi+yaml", + "text/x-yaml", + "text/yaml", + "application/x-yaml", + "application/yaml", + "application/x-json", + "application/json", + "text/x-json", + "text/json", + ) } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false val ext = sourceInfo.extension?.lowercase() if (ext != null && ext !in EXTENSIONS) return false + if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index ed04b6e669..5f5cd5aa86 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -34,6 +34,7 @@ commonsCsv = "1.14.1" commonsCompress = "1.28.0" commonsIo = "2.21.0" commonsStatistics = "1.2" +tika = "3.3.0" serialization = "1.11.0" poi = "5.5.1" mariadb = "3.5.8" @@ -100,6 +101,7 @@ commonsCsv = { group = "org.apache.commons", name = "commons-csv", version.ref = commonsCompress = { group = "org.apache.commons", name = "commons-compress", version.ref = "commonsCompress" } commonsIo = { group = "commons-io", name = "commons-io", version.ref = "commonsIo" } commonsStatisticsDescriptive = { group = "org.apache.commons", name = "commons-statistics-descriptive", version.ref = "commonsStatistics" } +tika = { group = "org.apache.tika", name = "tika-core", version.ref = "tika" } # Serialization serialization-core = { group = "org.jetbrains.kotlinx", name = "kotlinx-serialization-core", version.ref = "serialization" } From 0033315fa696e88db691f951e535cfda2e791e46 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 19 May 2026 14:58:48 +0200 Subject: [PATCH 13/20] api dump --- core/api/core.api | 47 ++++++++ dataframe-arrow/api/dataframe-arrow.api | 88 ++++++++++++++ dataframe-csv/api/dataframe-csv.api | 108 ++++++++++++++++++ dataframe-excel/api/dataframe-excel.api | 41 +++++++ dataframe-jdbc/api/dataframe-jdbc.api | 36 ++++++ dataframe-json/api/dataframe-json.api | 34 ++++++ .../api/dataframe-openapi-generator.api | 36 ++++++ 7 files changed, 390 insertions(+) diff --git a/core/api/core.api b/core/api/core.api index 790ce8fa29..6ff4138858 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -663,6 +663,7 @@ public final class org/jetbrains/kotlinx/dataframe/api/ChunkedKt { } public final class org/jetbrains/kotlinx/dataframe/api/CodeString { + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/api/CodeString$Companion; public static final synthetic fun box-impl (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/api/CodeString; public static fun constructor-impl (Ljava/lang/String;)Ljava/lang/String; public fun equals (Ljava/lang/Object;)Z @@ -676,6 +677,9 @@ public final class org/jetbrains/kotlinx/dataframe/api/CodeString { public final synthetic fun unbox-impl ()Ljava/lang/String; } +public final class org/jetbrains/kotlinx/dataframe/api/CodeString$Companion { +} + public abstract interface class org/jetbrains/kotlinx/dataframe/api/ColColumnsSelectionDsl { public fun col (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/columns/ColumnAccessor; public fun col (Ljava/lang/String;I)Lorg/jetbrains/kotlinx/dataframe/columns/SingleColumn; @@ -6028,6 +6032,37 @@ public final class org/jetbrains/kotlinx/dataframe/io/DataFrameHtmlData$Companio public static synthetic fun tableDefinitions$default (Lorg/jetbrains/kotlinx/dataframe/io/DataFrameHtmlData$Companion;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/DataFrameHtmlData; } +public abstract interface class org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { +} + +public abstract interface class org/jetbrains/kotlinx/dataframe/io/DataFrameReadSource { + public abstract fun acceptsSource (Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Z + public abstract fun getSupportedTypes ()Ljava/util/Set; + public abstract fun getTestOrder ()I + public abstract fun readDataFrameOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readDataFrameOrNull$default (Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadSource;Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun readDataFrameSchemaOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; + public static synthetic fun readDataFrameSchemaOrNull$default (Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadSource;Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; + public fun readDataSchemaCodeOrNull-myXLQ2E (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Ljava/lang/String; + public static synthetic fun readDataSchemaCodeOrNull-myXLQ2E$default (Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadSource;Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;ILjava/lang/Object;)Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/io/DataSourceInfo { + public fun (Lkotlin/reflect/KType;Ljava/lang/String;Ljava/lang/String;)V + public synthetic fun (Lkotlin/reflect/KType;Ljava/lang/String;Ljava/lang/String;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()Lkotlin/reflect/KType; + public final fun component2 ()Ljava/lang/String; + public final fun component3 ()Ljava/lang/String; + public final fun copy (Lkotlin/reflect/KType;Ljava/lang/String;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lkotlin/reflect/KType;Ljava/lang/String;Ljava/lang/String;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo; + public fun equals (Ljava/lang/Object;)Z + public final fun getExtension ()Ljava/lang/String; + public final fun getKType ()Lkotlin/reflect/KType; + public final fun getMimeType ()Ljava/lang/String; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + public final class org/jetbrains/kotlinx/dataframe/io/DisplayConfiguration { public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/DisplayConfiguration$Companion; public synthetic fun (Ljava/lang/Integer;Ljava/lang/Integer;ILkotlin/jvm/functions/Function3;Ljava/lang/String;ZZZZZILkotlin/jvm/internal/DefaultConstructorMarker;)V @@ -6078,6 +6113,18 @@ public final class org/jetbrains/kotlinx/dataframe/io/DisplayConfiguration$Compa public final fun getDEFAULT ()Lorg/jetbrains/kotlinx/dataframe/io/DisplayConfiguration; } +public final class org/jetbrains/kotlinx/dataframe/io/Guess2Kt { + public static final fun getNewSupportedFormats ()Ljava/util/List; + public static final fun readSource (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/Object;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readSource (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/Object;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun readSource (Lorg/jetbrains/kotlinx/dataframe/api/CodeString$Companion;Ljava/lang/Object;Lkotlin/reflect/KType;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;Ljava/util/List;)Ljava/lang/String; + public static final fun readSource (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/lang/Object;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; + public static synthetic fun readSource$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/Object;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readSource$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/Object;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun readSource$default (Lorg/jetbrains/kotlinx/dataframe/api/CodeString$Companion;Ljava/lang/Object;Lkotlin/reflect/KType;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;Ljava/util/List;ILjava/lang/Object;)Ljava/lang/String; + public static synthetic fun readSource$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/lang/Object;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; +} + public final class org/jetbrains/kotlinx/dataframe/io/GuessKt { public static final fun read (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun read (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; diff --git a/dataframe-arrow/api/dataframe-arrow.api b/dataframe-arrow/api/dataframe-arrow.api index c3f0a80c3b..4a098bd002 100644 --- a/dataframe-arrow/api/dataframe-arrow.api +++ b/dataframe-arrow/api/dataframe-arrow.api @@ -9,6 +9,64 @@ public final class org/jetbrains/kotlinx/dataframe/io/ArrowFeather : org/jetbrai public fun readDataFrame (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; } +public final class org/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW : org/jetbrains/kotlinx/dataframe/io/DataFrameReadSource { + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$Companion; + public fun ()V + public fun acceptsSource (Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Z + public fun getSupportedTypes ()Ljava/util/Set; + public fun getTestOrder ()I + public fun readDataFrameOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun readDataFrameSchemaOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; + public fun readDataSchemaCodeOrNull-myXLQ2E (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Ljava/lang/String; + public fun toString ()Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$Companion { +} + +public final class org/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { + public fun ()V + public fun (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;)V + public synthetic fun (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions; + public final fun copy (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;)Lorg/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$Options; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$Options;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$Options; + public fun equals (Ljava/lang/Object;)Z + public final fun getNullability ()Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/io/ArrowIPC : org/jetbrains/kotlinx/dataframe/io/DataFrameReadSource { + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/ArrowIPC$Companion; + public fun ()V + public fun acceptsSource (Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Z + public fun getSupportedTypes ()Ljava/util/Set; + public fun getTestOrder ()I + public fun readDataFrameOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun readDataFrameSchemaOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; + public fun readDataSchemaCodeOrNull-myXLQ2E (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Ljava/lang/String; + public fun toString ()Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/io/ArrowIPC$Companion { +} + +public final class org/jetbrains/kotlinx/dataframe/io/ArrowIPC$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { + public fun ()V + public fun (Lorg/apache/arrow/memory/RootAllocator;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;)V + public synthetic fun (Lorg/apache/arrow/memory/RootAllocator;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()Lorg/apache/arrow/memory/RootAllocator; + public final fun component2 ()Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions; + public final fun copy (Lorg/apache/arrow/memory/RootAllocator;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;)Lorg/jetbrains/kotlinx/dataframe/io/ArrowIPC$Options; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/ArrowIPC$Options;Lorg/apache/arrow/memory/RootAllocator;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/ArrowIPC$Options; + public fun equals (Ljava/lang/Object;)Z + public final fun getAllocator ()Lorg/apache/arrow/memory/RootAllocator; + public final fun getNullability ()Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + public final class org/jetbrains/kotlinx/dataframe/io/ArrowReadingKt { public static final fun readArrow (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/arrow/vector/ipc/ArrowReader;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readArrow$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/arrow/vector/ipc/ArrowReader;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; @@ -323,3 +381,33 @@ public final class org/jetbrains/kotlinx/dataframe/io/ConvertingMismatch$Widenin public fun toString ()Ljava/lang/String; } +public final class org/jetbrains/kotlinx/dataframe/io/Parquet : org/jetbrains/kotlinx/dataframe/io/DataFrameReadSource { + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/Parquet$Companion; + public fun ()V + public fun acceptsSource (Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Z + public fun getSupportedTypes ()Ljava/util/Set; + public fun getTestOrder ()I + public fun readDataFrameOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun readDataFrameSchemaOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; + public fun readDataSchemaCodeOrNull-myXLQ2E (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Ljava/lang/String; + public fun toString ()Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/io/Parquet$Companion { +} + +public final class org/jetbrains/kotlinx/dataframe/io/Parquet$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { + public fun ()V + public fun (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;J)V + public synthetic fun (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;JILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions; + public final fun component2 ()J + public final fun copy (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;J)Lorg/jetbrains/kotlinx/dataframe/io/Parquet$Options; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Parquet$Options;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;JILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Parquet$Options; + public fun equals (Ljava/lang/Object;)Z + public final fun getBatchSize ()J + public final fun getNullability ()Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + diff --git a/dataframe-csv/api/dataframe-csv.api b/dataframe-csv/api/dataframe-csv.api index a9a964783c..d8c4ca4133 100644 --- a/dataframe-csv/api/dataframe-csv.api +++ b/dataframe-csv/api/dataframe-csv.api @@ -1,3 +1,57 @@ +public final class org/jetbrains/kotlinx/dataframe/io/Csv : org/jetbrains/kotlinx/dataframe/io/DataFrameReadSource { + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/Csv$Companion; + public fun ()V + public fun acceptsSource (Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Z + public fun getSupportedTypes ()Ljava/util/Set; + public fun getTestOrder ()I + public fun readDataFrameOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun readDataFrameSchemaOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; + public fun readDataSchemaCodeOrNull-myXLQ2E (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Ljava/lang/String; + public fun toString ()Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/io/Csv$Companion { +} + +public final class org/jetbrains/kotlinx/dataframe/io/Csv$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { + public fun ()V + public fun (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)V + public synthetic fun (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()C + public final fun component10 ()Z + public final fun component11 ()C + public final fun component12 ()Z + public final fun component13 ()Z + public final fun component14 ()Z + public final fun component2 ()Ljava/util/List; + public final fun component3 ()Ljava/nio/charset/Charset; + public final fun component4 ()Ljava/util/Map; + public final fun component5 ()J + public final fun component6 ()Ljava/lang/Long; + public final fun component7 ()Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; + public final fun component8 ()Z + public final fun component9 ()Z + public final fun copy (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)Lorg/jetbrains/kotlinx/dataframe/io/Csv$Options; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Csv$Options;CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Csv$Options; + public fun equals (Ljava/lang/Object;)Z + public final fun getAllowMissingColumns ()Z + public final fun getCharset ()Ljava/nio/charset/Charset; + public final fun getColTypes ()Ljava/util/Map; + public final fun getDelimiter ()C + public final fun getHeader ()Ljava/util/List; + public final fun getIgnoreEmptyLines ()Z + public final fun getIgnoreExcessColumns ()Z + public final fun getIgnoreSurroundingSpaces ()Z + public final fun getParseParallel ()Z + public final fun getParserOptions ()Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; + public final fun getQuote ()C + public final fun getReadLines ()Ljava/lang/Long; + public final fun getSkipLines ()J + public final fun getTrimInsideQuoted ()Z + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + public final class org/jetbrains/kotlinx/dataframe/io/CsvDeephaven : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat { public fun ()V public fun (C)V @@ -121,6 +175,60 @@ public final class org/jetbrains/kotlinx/dataframe/io/ToTsvStrKt { public static synthetic fun toTsvStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ZCLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)Ljava/lang/String; } +public final class org/jetbrains/kotlinx/dataframe/io/Tsv : org/jetbrains/kotlinx/dataframe/io/DataFrameReadSource { + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/Tsv$Companion; + public fun ()V + public fun acceptsSource (Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Z + public fun getSupportedTypes ()Ljava/util/Set; + public fun getTestOrder ()I + public fun readDataFrameOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun readDataFrameSchemaOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; + public fun readDataSchemaCodeOrNull-myXLQ2E (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Ljava/lang/String; + public fun toString ()Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/io/Tsv$Companion { +} + +public final class org/jetbrains/kotlinx/dataframe/io/Tsv$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { + public fun ()V + public fun (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)V + public synthetic fun (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()C + public final fun component10 ()Z + public final fun component11 ()C + public final fun component12 ()Z + public final fun component13 ()Z + public final fun component14 ()Z + public final fun component2 ()Ljava/util/List; + public final fun component3 ()Ljava/nio/charset/Charset; + public final fun component4 ()Ljava/util/Map; + public final fun component5 ()J + public final fun component6 ()Ljava/lang/Long; + public final fun component7 ()Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; + public final fun component8 ()Z + public final fun component9 ()Z + public final fun copy (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)Lorg/jetbrains/kotlinx/dataframe/io/Tsv$Options; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Tsv$Options;CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Tsv$Options; + public fun equals (Ljava/lang/Object;)Z + public final fun getAllowMissingColumns ()Z + public final fun getCharset ()Ljava/nio/charset/Charset; + public final fun getColTypes ()Ljava/util/Map; + public final fun getDelimiter ()C + public final fun getHeader ()Ljava/util/List; + public final fun getIgnoreEmptyLines ()Z + public final fun getIgnoreExcessColumns ()Z + public final fun getIgnoreSurroundingSpaces ()Z + public final fun getParseParallel ()Z + public final fun getParserOptions ()Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; + public final fun getQuote ()C + public final fun getReadLines ()Ljava/lang/Long; + public final fun getSkipLines ()J + public final fun getTrimInsideQuoted ()Z + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + public final class org/jetbrains/kotlinx/dataframe/io/TsvDeephaven : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat { public fun ()V public fun (C)V diff --git a/dataframe-excel/api/dataframe-excel.api b/dataframe-excel/api/dataframe-excel.api index 06541cf3c9..0320ffc31d 100644 --- a/dataframe-excel/api/dataframe-excel.api +++ b/dataframe-excel/api/dataframe-excel.api @@ -9,6 +9,47 @@ public final class org/jetbrains/kotlinx/dataframe/io/Excel : org/jetbrains/kotl public fun readDataFrame (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; } +public final class org/jetbrains/kotlinx/dataframe/io/ExcelNEW : org/jetbrains/kotlinx/dataframe/io/DataFrameReadSource { + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/ExcelNEW$Companion; + public fun ()V + public fun acceptsSource (Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Z + public fun getSupportedTypes ()Ljava/util/Set; + public fun getTestOrder ()I + public fun readDataFrameOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun readDataFrameSchemaOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; + public fun readDataSchemaCodeOrNull-myXLQ2E (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Ljava/lang/String; + public fun toString ()Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/io/ExcelNEW$Companion { +} + +public final class org/jetbrains/kotlinx/dataframe/io/ExcelNEW$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { + public synthetic fun (Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZILkotlin/jvm/internal/DefaultConstructorMarker;)V + public synthetic fun (Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZLkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()Ljava/lang/String; + public final fun component2 ()I + public final fun component3 ()Ljava/lang/String; + public final fun component4-358K8uM ()Ljava/lang/String; + public final fun component5 ()Ljava/lang/Integer; + public final fun component6 ()Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy; + public final fun component7 ()Z + public final fun component8 ()Z + public final fun copy-vOPuZIo (Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZ)Lorg/jetbrains/kotlinx/dataframe/io/ExcelNEW$Options; + public static synthetic fun copy-vOPuZIo$default (Lorg/jetbrains/kotlinx/dataframe/io/ExcelNEW$Options;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/ExcelNEW$Options; + public fun equals (Ljava/lang/Object;)Z + public final fun getColumns ()Ljava/lang/String; + public final fun getFirstRowIsHeader ()Z + public final fun getNameRepairStrategy ()Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy; + public final fun getParseEmptyAsNull ()Z + public final fun getRowsCount ()Ljava/lang/Integer; + public final fun getSheetName ()Ljava/lang/String; + public final fun getSkipRows ()I + public final fun getStringColumns-358K8uM ()Ljava/lang/String; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + public final class org/jetbrains/kotlinx/dataframe/io/FormattingOptions { public fun (Ljava/lang/String;Lorg/apache/poi/ss/usermodel/DataFormatter;)V public synthetic fun (Ljava/lang/String;Lorg/apache/poi/ss/usermodel/DataFormatter;ILkotlin/jvm/internal/DefaultConstructorMarker;)V diff --git a/dataframe-jdbc/api/dataframe-jdbc.api b/dataframe-jdbc/api/dataframe-jdbc.api index 43b8f1cf82..4b3c3fc4e3 100644 --- a/dataframe-jdbc/api/dataframe-jdbc.api +++ b/dataframe-jdbc/api/dataframe-jdbc.api @@ -25,6 +25,42 @@ public final class org/jetbrains/kotlinx/dataframe/io/Jdbc : org/jetbrains/kotli public fun readDataFrame (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; } +public final class org/jetbrains/kotlinx/dataframe/io/Jdbc2 : org/jetbrains/kotlinx/dataframe/io/DataFrameReadSource { + public fun ()V + public fun acceptsSource (Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Z + public fun getSupportedTypes ()Ljava/util/Set; + public fun getTestOrder ()I + public fun readDataFrameOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun readDataFrameSchemaOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; + public fun readDataSchemaCodeOrNull-myXLQ2E (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Ljava/lang/String; + public fun toString ()Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/io/Jdbc2$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { + public fun ()V + public fun (Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;Ljava/sql/Connection;)V + public synthetic fun (Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;Ljava/sql/Connection;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()Ljava/lang/String; + public final fun component2 ()Ljava/lang/Integer; + public final fun component3 ()Z + public final fun component4 ()Lorg/jetbrains/kotlinx/dataframe/io/db/DbType; + public final fun component5 ()Z + public final fun component6 ()Lkotlin/jvm/functions/Function1; + public final fun component7 ()Ljava/sql/Connection; + public final fun copy (Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;Ljava/sql/Connection;)Lorg/jetbrains/kotlinx/dataframe/io/Jdbc2$Options; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Jdbc2$Options;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;Ljava/sql/Connection;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Jdbc2$Options; + public fun equals (Ljava/lang/Object;)Z + public final fun getConfigureStatement ()Lkotlin/jvm/functions/Function1; + public final fun getDbType ()Lorg/jetbrains/kotlinx/dataframe/io/db/DbType; + public final fun getInferNullability ()Z + public final fun getLimit ()Ljava/lang/Integer; + public final fun getResultSetConnection ()Ljava/sql/Connection; + public final fun getSqlQueryOrTableName ()Ljava/lang/String; + public final fun getStrictValidation ()Z + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + public final class org/jetbrains/kotlinx/dataframe/io/JdbcSchemaKt { public static final fun buildCodeForDB (Ljava/net/URL;Ljava/lang/String;)Ljava/lang/String; public static final fun getDatabaseCodeGenReader (Lorg/jetbrains/kotlinx/dataframe/codeGen/CodeGenerator$Companion;)Lkotlin/jvm/functions/Function2; diff --git a/dataframe-json/api/dataframe-json.api b/dataframe-json/api/dataframe-json.api index 368faf4a12..46885ef8e5 100644 --- a/dataframe-json/api/dataframe-json.api +++ b/dataframe-json/api/dataframe-json.api @@ -40,6 +40,40 @@ public final class org/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic : jav public static fun values ()[Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; } +public final class org/jetbrains/kotlinx/dataframe/io/Json : org/jetbrains/kotlinx/dataframe/io/DataFrameReadSource { + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/Json$Companion; + public fun ()V + public fun acceptsSource (Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Z + public fun getSupportedTypes ()Ljava/util/Set; + public fun getTestOrder ()I + public fun readDataFrameOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun readDataFrameSchemaOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; + public fun readDataSchemaCodeOrNull-myXLQ2E (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Ljava/lang/String; + public fun toString ()Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/io/Json$Companion { +} + +public final class org/jetbrains/kotlinx/dataframe/io/Json$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { + public fun ()V + public fun (Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;Z)V + public synthetic fun (Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()Ljava/util/List; + public final fun component2 ()Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; + public final fun component3 ()Ljava/util/List; + public final fun component4 ()Z + public final fun copy (Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;Z)Lorg/jetbrains/kotlinx/dataframe/io/Json$Options; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Json$Options;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Json$Options; + public fun equals (Ljava/lang/Object;)Z + public final fun getHeader ()Ljava/util/List; + public final fun getKeyValuePaths ()Ljava/util/List; + public final fun getTypeClashTactic ()Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; + public final fun getUnifyNumbers ()Z + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + public final class org/jetbrains/kotlinx/dataframe/io/JsonKt { public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; diff --git a/dataframe-openapi-generator/api/dataframe-openapi-generator.api b/dataframe-openapi-generator/api/dataframe-openapi-generator.api index cc65f36f8e..76583b5641 100644 --- a/dataframe-openapi-generator/api/dataframe-openapi-generator.api +++ b/dataframe-openapi-generator/api/dataframe-openapi-generator.api @@ -19,6 +19,42 @@ public final class org/jetbrains/kotlinx/dataframe/io/OpenApi : org/jetbrains/ko public static synthetic fun readCodeForGeneration$default (Lorg/jetbrains/kotlinx/dataframe/io/OpenApi;Ljava/lang/String;Ljava/lang/String;ZZILjava/lang/Object;)Ljava/lang/String; } +public final class org/jetbrains/kotlinx/dataframe/io/OpenApi2 : org/jetbrains/kotlinx/dataframe/io/DataFrameReadSource { + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/OpenApi2$Companion; + public fun ()V + public fun acceptsSource (Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Z + public fun getSupportedTypes ()Ljava/util/Set; + public fun getTestOrder ()I + public fun readDataFrameOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun readDataFrameSchemaOrNull (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; + public fun readDataSchemaCodeOrNull-myXLQ2E (Ljava/lang/Object;Lorg/jetbrains/kotlinx/dataframe/io/DataSourceInfo;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions;)Ljava/lang/String; + public fun toString ()Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/io/OpenApi2$Companion { +} + +public final class org/jetbrains/kotlinx/dataframe/io/OpenApi2$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { + public fun ()V + public fun (Ljava/util/List;Lio/swagger/v3/parser/core/models/ParseOptions;ZZLorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility;)V + public synthetic fun (Ljava/util/List;Lio/swagger/v3/parser/core/models/ParseOptions;ZZLorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()Ljava/util/List; + public final fun component2 ()Lio/swagger/v3/parser/core/models/ParseOptions; + public final fun component3 ()Z + public final fun component4 ()Z + public final fun component5 ()Lorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility; + public final fun copy (Ljava/util/List;Lio/swagger/v3/parser/core/models/ParseOptions;ZZLorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility;)Lorg/jetbrains/kotlinx/dataframe/io/OpenApi2$Options; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/OpenApi2$Options;Ljava/util/List;Lio/swagger/v3/parser/core/models/ParseOptions;ZZLorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/OpenApi2$Options; + public fun equals (Ljava/lang/Object;)Z + public final fun getAuth ()Ljava/util/List; + public final fun getExtensionProperties ()Z + public final fun getGenerateHelperCompanionObject ()Z + public final fun getParseOptions ()Lio/swagger/v3/parser/core/models/ParseOptions; + public final fun getVisibility ()Lorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + public final class org/jetbrains/kotlinx/dataframe/io/ReadOpenapiKt { public static final fun readOpenApi (Ljava/lang/String;Ljava/lang/String;Ljava/util/List;Lio/swagger/v3/parser/core/models/ParseOptions;ZZLorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility;)Ljava/lang/String; public static synthetic fun readOpenApi$default (Ljava/lang/String;Ljava/lang/String;Ljava/util/List;Lio/swagger/v3/parser/core/models/ParseOptions;ZZLorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility;ILjava/lang/Object;)Ljava/lang/String; From aa2bd1b16299fa534f29789480f3bd19776f17ed Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 19 May 2026 21:28:48 +0200 Subject: [PATCH 14/20] tests for parsing json columns to check behavior still matches. Added parseToDataFrameReadSource parser option. --- core/api/core.api | 15 +- .../kotlinx/dataframe/api/ParserOptions.kt | 12 +- .../kotlinx/dataframe/impl/api/convert.kt | 80 +++---- .../kotlinx/dataframe/impl/api/parse.kt | 96 +++++--- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 45 ++-- .../kotlinx/dataframe/io/ParserTests.kt | 4 +- .../kotlinx/dataframe/impl/io/readDelim.kt | 10 +- .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 3 + .../org/jetbrains/kotlinx/dataframe/io/tsv.kt | 3 + .../kotlinx/dataframe/io/db/DuckDb.kt | 3 +- .../dataframe/io/ParseJsonColumnTests.kt | 215 ++++++++++++++++++ 11 files changed, 383 insertions(+), 103 deletions(-) create mode 100644 dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParseJsonColumnTests.kt diff --git a/core/api/core.api b/core/api/core.api index 6ff4138858..ffeec4d531 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -2475,6 +2475,7 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/api/GlobalParser public abstract fun getNulls ()Ljava/util/Set; public abstract fun getParseExperimentalInstant ()Z public abstract fun getParseExperimentalUuid ()Z + public abstract fun getParseToDataFrameReadSource ()Z public abstract fun getSkipTypes ()Ljava/util/Set; public abstract fun getUseFastDoubleParser ()Z public abstract fun resetToDefault ()V @@ -2482,6 +2483,7 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/api/GlobalParser public abstract fun setLocale (Ljava/util/Locale;)V public abstract fun setParseExperimentalInstant (Z)V public abstract fun setParseExperimentalUuid (Z)V + public abstract fun setParseToDataFrameReadSource (Z)V public abstract fun setUseFastDoubleParser (Z)V } @@ -3622,18 +3624,19 @@ public final class org/jetbrains/kotlinx/dataframe/api/ParserOptions { public synthetic fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;ILkotlin/jvm/internal/DefaultConstructorMarker;)V public synthetic fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)V public synthetic fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;ILkotlin/jvm/internal/DefaultConstructorMarker;)V - public fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;)V - public synthetic fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;ILkotlin/jvm/internal/DefaultConstructorMarker;)V - public fun (Ljava/util/Locale;Lorg/jetbrains/kotlinx/dataframe/api/DateTimeParserOptions;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;)V - public synthetic fun (Ljava/util/Locale;Lorg/jetbrains/kotlinx/dataframe/api/DateTimeParserOptions;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;ILkotlin/jvm/internal/DefaultConstructorMarker;)V - public final fun copy (Ljava/util/Locale;Lorg/jetbrains/kotlinx/dataframe/api/DateTimeParserOptions;Ljava/lang/Iterable;Ljava/lang/Iterable;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; - public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;Ljava/util/Locale;Lorg/jetbrains/kotlinx/dataframe/api/DateTimeParserOptions;Ljava/lang/Iterable;Ljava/lang/Iterable;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; + public fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;)V + public synthetic fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun (Ljava/util/Locale;Lorg/jetbrains/kotlinx/dataframe/api/DateTimeParserOptions;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;)V + public synthetic fun (Ljava/util/Locale;Lorg/jetbrains/kotlinx/dataframe/api/DateTimeParserOptions;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun copy (Ljava/util/Locale;Lorg/jetbrains/kotlinx/dataframe/api/DateTimeParserOptions;Ljava/lang/Iterable;Ljava/lang/Iterable;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;Ljava/util/Locale;Lorg/jetbrains/kotlinx/dataframe/api/DateTimeParserOptions;Ljava/lang/Iterable;Ljava/lang/Iterable;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;Ljava/lang/Boolean;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; public fun equals (Ljava/lang/Object;)Z public final fun getDateTime ()Lorg/jetbrains/kotlinx/dataframe/api/DateTimeParserOptions; public final fun getLocale ()Ljava/util/Locale; public final fun getNullStrings ()Ljava/util/Set; public final fun getParseExperimentalInstant ()Ljava/lang/Boolean; public final fun getParseExperimentalUuid ()Ljava/lang/Boolean; + public final fun getParseToDataFrameReadSource ()Ljava/lang/Boolean; public final fun getSkipTypes ()Ljava/util/Set; public final fun getUseFastDoubleParser ()Ljava/lang/Boolean; public fun hashCode ()I diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ParserOptions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ParserOptions.kt index b77de07bab..abad2f2631 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ParserOptions.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ParserOptions.kt @@ -291,6 +291,8 @@ public interface GlobalParserOptions { * @see [addJavaDateTimePattern] */ public var dateTimeLibrary: ParseDateTimeLibrary? + + public var parseToDataFrameReadSource: Boolean } /** @include [GlobalParserOptions] */ @@ -410,6 +412,7 @@ public class ParserOptions( public val useFastDoubleParser: Boolean? = null, public val parseExperimentalUuid: Boolean? = null, public val parseExperimentalInstant: Boolean? = null, + public val parseToDataFrameReadSource: Boolean? = null, ) { public fun copy( locale: Locale? = this.locale, @@ -419,6 +422,7 @@ public class ParserOptions( useFastDoubleParser: Boolean? = this.useFastDoubleParser, parseExperimentalUuid: Boolean? = this.parseExperimentalUuid, parseExperimentalInstant: Boolean? = this.parseExperimentalInstant, + parseToDataFrameReadSource: Boolean? = this.parseToDataFrameReadSource, ): ParserOptions = ParserOptions( locale = locale, @@ -428,6 +432,7 @@ public class ParserOptions( useFastDoubleParser = useFastDoubleParser, parseExperimentalUuid = parseExperimentalUuid, parseExperimentalInstant = parseExperimentalInstant, + parseToDataFrameReadSource = parseToDataFrameReadSource, ) override fun equals(other: Any?): Boolean { @@ -439,6 +444,7 @@ public class ParserOptions( if (useFastDoubleParser != other.useFastDoubleParser) return false if (parseExperimentalUuid != other.parseExperimentalUuid) return false if (parseExperimentalInstant != other.parseExperimentalInstant) return false + if (parseToDataFrameReadSource != other.parseToDataFrameReadSource) return false if (locale != other.locale) return false if (dateTime != other.dateTime) return false if (nullStrings != other.nullStrings) return false @@ -451,6 +457,8 @@ public class ParserOptions( var result = useFastDoubleParser?.hashCode() ?: 0 result = 31 * result + (parseExperimentalUuid?.hashCode() ?: 0) result = 31 * result + (parseExperimentalInstant?.hashCode() ?: 0) + result = 31 * result + (parseToDataFrameReadSource?.hashCode() ?: 0) + result = 31 * result + (parseExperimentalInstant?.hashCode() ?: 0) result = 31 * result + (locale?.hashCode() ?: 0) result = 31 * result + (dateTime?.hashCode() ?: 0) result = 31 * result + (nullStrings?.hashCode() ?: 0) @@ -459,7 +467,7 @@ public class ParserOptions( } override fun toString(): String = - "ParserOptions(locale=$locale, dateTimeParserOptions=$dateTime, nullStrings=$nullStrings, skipTypes=$skipTypes, useFastDoubleParser=$useFastDoubleParser, parseExperimentalUuid=$parseExperimentalUuid, parseExperimentalInstant=$parseExperimentalInstant)" + "ParserOptions(locale=$locale, dateTimeParserOptions=$dateTime, nullStrings=$nullStrings, skipTypes=$skipTypes, useFastDoubleParser=$useFastDoubleParser, parseExperimentalUuid=$parseExperimentalUuid, parseExperimentalInstant=$parseExperimentalInstant, parseToDataFrameReadSource=$parseToDataFrameReadSource)" // region deprecated constructors @@ -518,6 +526,7 @@ public class ParserOptions( useFastDoubleParser: Boolean? = null, parseExperimentalUuid: Boolean? = null, parseExperimentalInstant: Boolean? = null, + parseToDataFrameReadSource: Boolean? = null, ) : this( locale = locale, dateTime = 0.run { @@ -535,6 +544,7 @@ public class ParserOptions( useFastDoubleParser = useFastDoubleParser, parseExperimentalUuid = parseExperimentalUuid, parseExperimentalInstant = parseExperimentalInstant, + parseToDataFrameReadSource = parseToDataFrameReadSource, ) // endregion } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt index 74a7e6564a..c734a6b101 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt @@ -196,10 +196,6 @@ internal inline fun convert(crossinline converter: (T) -> Any?): TypeConvert private enum class DummyEnum -private val dataFrameReadSourceSupportedClasses by lazy { - dataFrameReadSourceByType.keys.map { it.jvmErasure }.toSet() -} - @Suppress("UNCHECKED_CAST") internal fun createConverter(from: KType, to: KType, options: ParserOptions? = null): TypeConverter? { if (from.isMarkedNullable) { @@ -209,50 +205,46 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n val fromClass = from.jvmErasure val toClass = to.jvmErasure - // readSource-backed conversions handle target types with type arguments (e.g. `DataFrame<*>`, - // `DataRow<*>`), so they must run before the generic-arguments early-exit below. - if (dataFrameReadSourceByType.any { from.isSubtypeOf(it.key) }) { - val readSources = dataFrameReadSourceByType.entries - .first { from.isSubtypeOf(it.key) }.value - - when (toClass) { - DataFrame::class -> - return convert { source -> - DataFrame.readSource( - source = source, - type = from, - options = null, - formats = readSources, - ) - } - - DataRow::class -> - return convert { source -> - DataRow.readSource( - source = source, - type = from, - options = null, - formats = readSources, - ) - } - - DataFrameSchema::class -> - return convert { source -> - DataFrameSchema.readSource( - source = source, - type = from, - options = null, - formats = readSources, - ) - } - } + // early exit when we encounter types with generics (except DataFrame and DataRow), which we don't support + if (from.arguments.isNotEmpty() || + (to.arguments.isNotEmpty() && toClass !in setOf(DataFrame::class, DataRow::class)) + ) { + return null } - return when { - from.arguments.isNotEmpty() || to.arguments.isNotEmpty() -> null + val fromTypeInDfReadSources = + dataFrameReadSourceByType.keys.any { from.isSubtypeOf(it) } || from == typeOf() + return when { fromClass == toClass -> TypeConverterIdentity + fromTypeInDfReadSources && toClass == DataFrame::class -> + convert { source -> + DataFrame.readSource( + source = source, + type = from, + options = null, + ) + } + + fromTypeInDfReadSources && toClass == DataRow::class -> + convert { source -> + DataRow.readSource( + source = source, + type = from, + options = null, + ) + } + + fromTypeInDfReadSources && toClass == DataFrameSchema::class -> + convert { source -> + DataFrameSchema.readSource( + source = source, + type = from, + options = null, + ) + } + // kotlin.time.Duration is a value class, // so it must be handled before the generic toClass.isValue / fromClass.isValue branches. toClass == Duration::class -> when (fromClass) { @@ -277,7 +269,7 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n val underlyingType = constructor.parameters.single().type val converter = getConverter(from, underlyingType) ?: throw TypeConverterNotFoundException(from, underlyingType, null) - return convert { + convert { val converted = converter(it) if (converted == null && !underlyingType.isMarkedNullable) { throw TypeConversionException(it, from, underlyingType, null) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index 2d654e6024..77cbdcce35 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -39,6 +39,7 @@ import org.jetbrains.kotlinx.dataframe.api.isFrameColumn import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf import org.jetbrains.kotlinx.dataframe.api.map import org.jetbrains.kotlinx.dataframe.api.parser +import org.jetbrains.kotlinx.dataframe.api.singleOrNull import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.columns.size import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException @@ -50,7 +51,10 @@ import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse import org.jetbrains.kotlinx.dataframe.impl.lazyMapOf +import org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource import org.jetbrains.kotlinx.dataframe.io.isUrl +import org.jetbrains.kotlinx.dataframe.io.newSupportedFormats +import org.jetbrains.kotlinx.dataframe.io.readSourceImpl import org.jetbrains.kotlinx.dataframe.values import java.math.BigDecimal import java.math.BigInteger @@ -334,6 +338,8 @@ internal object Parsers : GlobalParserOptions { override var dateTimeLibrary: ParseDateTimeLibrary? = null + override var parseToDataFrameReadSource by Delegates.notNull() + override fun resetToDefault() { customGlobalJavaFormatters.values.forEach { it.clear() } nullStrings.clear() @@ -343,6 +349,8 @@ internal object Parsers : GlobalParserOptions { useFastDoubleParser = true parseExperimentalUuid = false parseExperimentalInstant = true + // disabled by default, because it can be very heavy + parseToDataFrameReadSource = false _locale = null dateTimeLibrary = null nullStrings.addAll(listOf("null", "NULL", "NA", "N/A")) @@ -941,40 +949,76 @@ internal object Parsers : GlobalParserOptions { stringParser { it.toBigIntegerOrNull() }, // BigDecimal stringParser { it.toBigDecimalOrNull() }, - // JSON array as DataFrame<*> - stringParser(catch = true) { - val trimmed = it.trim() - if (trimmed.startsWith("[") && trimmed.endsWith("]")) { - if (readJsonStrAnyFrame == null) { - logger.warn { - "parse() encountered a string that looks like a JSON array, but the dataframe-json dependency was not detected. Skipping for now." - } - null - } else { - readJsonStrAnyFrame!!(trimmed) + // Char + stringParser { it.singleOrNull() }, + stringParserWithOptions { options, isConverter -> + if (options?.parseToDataFrameReadSource ?: this.parseToDataFrameReadSource) { + parseBy { + readSourceImpl( + source = it, + sourceType = typeOf(), + options = null, + formats = newSupportedFormats, + resultKind = "DataRow", + doStringToUrlConversion = isConverter, + readOrNull = { source, sourceInfo, options -> + readDataFrameOrNull(source, sourceInfo, options)?.singleOrNull() + }, + ).getOrNull() } } else { - null + SKIP_PARSER } }, - // JSON object as DataRow<*> - stringParser(catch = true) { - val trimmed = it.trim() - if (trimmed.startsWith("{") && trimmed.endsWith("}")) { - if (readJsonStrAnyRow == null) { - logger.warn { - "parse() encountered a string that looks like a JSON object, but the dataframe-json dependency was not detected. Skipping for now." - } - null - } else { - readJsonStrAnyRow!!(trimmed) + stringParserWithOptions { options, isConverter -> + if (options?.parseToDataFrameReadSource ?: this.parseToDataFrameReadSource) { + parseBy { + readSourceImpl( + source = it, + sourceType = typeOf(), + options = null, + formats = newSupportedFormats, + resultKind = "DataFrame", + doStringToUrlConversion = isConverter, + readOrNull = DataFrameReadSource::readDataFrameOrNull, + ).getOrNull() } } else { - null + SKIP_PARSER } }, - // Char - stringParser { it.singleOrNull() }, +// // JSON array as DataFrame<*> +// stringParser(catch = true) { +// val trimmed = it.trim() +// if (trimmed.startsWith("[") && trimmed.endsWith("]")) { +// if (readJsonStrAnyFrame == null) { +// logger.warn { +// "parse() encountered a string that looks like a JSON array, but the dataframe-json dependency was not detected. Skipping for now." +// } +// null +// } else { +// readJsonStrAnyFrame!!(trimmed) +// } +// } else { +// null +// } +// }, +// // JSON object as DataRow<*> +// stringParser(catch = true) { +// val trimmed = it.trim() +// if (trimmed.startsWith("{") && trimmed.endsWith("}")) { +// if (readJsonStrAnyRow == null) { +// logger.warn { +// "parse() encountered a string that looks like a JSON object, but the dataframe-json dependency was not detected. Skipping for now." +// } +// null +// } else { +// readJsonStrAnyRow!!(trimmed) +// } +// } else { +// null +// } +// }, // No parser found, return as String // must be last in the list of parsers to return original unparsed string stringParser { it }, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index 858cee018a..7f92575aa0 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -88,13 +88,7 @@ public data class DataSourceInfo( public val kType: KType, public val extension: String? = null, public val mimeType: String? = null, -) { - init { - if (mimeType != null) { - println() - } - } -} +) /** * NOTE: Needs to have fully qualified name in @@ -116,13 +110,6 @@ internal val dataFrameReadSourceByType: Map> by getOrPut(type) { mutableListOf() }.let { if (format !in it) it += format } - - // special String -> URL case - if (type == typeOf()) { - getOrPut(typeOf()) { mutableListOf() }.let { - if (format !in it) it += format - } - } } } values.forEach { @@ -145,13 +132,14 @@ internal fun readSourceImpl( options: DataFrameReadOptions?, formats: List, resultKind: String, + doStringToUrlConversion: Boolean, readOrNull: DataFrameReadSource.( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, ) -> T?, -): T { - if (source is String) { +): Result { + if (doStringToUrlConversion && source is String) { val url = asUrlOrNull(source) if (url != null) { return readSourceImpl( @@ -160,6 +148,7 @@ internal fun readSourceImpl( options = options, formats = formats, resultKind = resultKind, + doStringToUrlConversion = true, readOrNull = readOrNull, ) } @@ -189,14 +178,20 @@ internal fun readSourceImpl( if (!it.acceptsSource(sourceInfo, options)) return@forEach try { val result = it.readOrNull(getSource(), sourceInfo, options) - if (result != null) return result + if (result != null) return Result.success(result) + + val name = it::class.simpleName!! + tries[name] = Exception("$name returned null.") } catch (e: FileNotFoundException) { - throw e + // fail early. File not found means the reference is broken. + return Result.failure(exception = e) } catch (e: Exception) { tries[it::class.simpleName!!] = e } } - throw IllegalArgumentException("Unknown $resultKind source $source, $sourceInfo; Tried $tries") + return Result.failure( + exception = IllegalArgumentException("Unknown $resultKind source $source, $sourceInfo; Tried $tries"), + ) } /** @@ -223,8 +218,9 @@ public fun DataFrame.Companion.readSource( options = options, formats = formats, resultKind = "DataFrame", + doStringToUrlConversion = true, readOrNull = DataFrameReadSource::readDataFrameOrNull, - ) + ).getOrThrow() public inline fun DataRow.Companion.readSource( source: R, @@ -244,10 +240,11 @@ public fun DataRow.Companion.readSource( options = options, formats = formats, resultKind = "DataRow", + doStringToUrlConversion = true, readOrNull = { source, sourceInfo, options -> readDataFrameOrNull(source, sourceInfo, options)?.single() }, - ) + ).getOrThrow() public inline fun DataFrame.Companion.readSource( source: R, @@ -279,8 +276,9 @@ public fun DataFrameSchema.Companion.readSource( options = options, formats = formats, resultKind = "DataFrameSchema", + doStringToUrlConversion = true, readOrNull = DataFrameReadSource::readDataFrameSchemaOrNull, - ) + ).getOrThrow() public inline fun DataFrameSchema.Companion.readSource( source: R, @@ -318,10 +316,11 @@ public fun CodeString.Companion.readSource( options = options, formats = formats, resultKind = "CodeString", + doStringToUrlConversion = true, readOrNull = { src, info, opts -> readDataSchemaCodeOrNull(src, info, name, opts) }, - ) + ).getOrThrow() public inline fun CodeString.Companion.readSource( source: R, diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt index 5900956fcf..3ba90f39d1 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt @@ -464,7 +464,9 @@ class ParserTests { @Test fun `Mixing null and json`() { val col by columnOf("[\"str\"]", "[]", "null") - val parsed = col.parse() + val parsed = col.parse( + ParserOptions(parseToDataFrameReadSource = true), + ) parsed.type() shouldBe typeOf() parsed.kind() shouldBe ColumnKind.Frame require(parsed.isFrameColumn()) diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt index 34bf7aab0d..cd9230b610 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt @@ -231,8 +231,16 @@ private fun CsvReader.ResultColumn.toDataColumn( val givenSkipTypes = parserOptions?.skipTypes ?: DataFrame.parser.skipTypes // no need to check for types that Deephaven already parses, skip those too val adjustedSkipTypes = givenSkipTypes + typesDeephavenAlreadyParses + + val parseDfReadSource = parserOptions?.parseToDataFrameReadSource + ?: DataFrame.parser.parseToDataFrameReadSource.takeIf { it } // only take if adjusted to 'true' + ?: true + val adjustedParserOptions = (parserOptions ?: ParserOptions()) - .copy(skipTypes = adjustedSkipTypes) + .copy( + skipTypes = adjustedSkipTypes, + parseToDataFrameReadSource = parseDfReadSource, + ) column.tryParse(adjustedParserOptions) } diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 4e77e63d50..57cdb342da 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -134,6 +134,9 @@ public class Csv : DataFrameReadSource { kType.isSubTypeOf() -> (source as? String)?.let { text -> + // early fail + if (opts.delimiter !in text) return null + DataFrame.readCsvStr( text = text, delimiter = opts.delimiter, diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index 96e026ab69..49f3723a5e 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -134,6 +134,9 @@ public class Tsv : DataFrameReadSource { kType.isSubTypeOf() -> (source as? String)?.let { text -> + // early fail + if (opts.delimiter !in text) return null + DataFrame.readTsvStr( text = text, delimiter = opts.delimiter, diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DuckDb.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DuckDb.kt index e454dec406..80b54229b3 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DuckDb.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DuckDb.kt @@ -43,6 +43,7 @@ import org.duckdb.JsonNode import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.Infer +import org.jetbrains.kotlinx.dataframe.api.ParserOptions import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.asDataColumn import org.jetbrains.kotlinx.dataframe.api.inferType @@ -158,7 +159,7 @@ public object DuckDb : AdvancedDbType("duckdb") { .withColumnBuilder(targetSchema = null) { name, values, inferNullability -> values .toColumn(name, if (inferNullability) Infer.Nulls else Infer.None) - .tryParse() + .tryParse(ParserOptions(parseToDataFrameReadSource = true)) .inferType() } diff --git a/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParseJsonColumnTests.kt b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParseJsonColumnTests.kt new file mode 100644 index 0000000000..1a6f3430bc --- /dev/null +++ b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParseJsonColumnTests.kt @@ -0,0 +1,215 @@ +package org.jetbrains.kotlinx.dataframe.io + +import io.kotest.matchers.shouldBe +import org.intellij.lang.annotations.Language +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.api.asColumnGroup +import org.jetbrains.kotlinx.dataframe.api.asFrameColumn +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.getColumnGroup +import org.jetbrains.kotlinx.dataframe.api.getFrameColumn +import org.jetbrains.kotlinx.dataframe.api.isColumnGroup +import org.jetbrains.kotlinx.dataframe.api.isFrameColumn +import org.jetbrains.kotlinx.dataframe.api.parse +import org.jetbrains.kotlinx.dataframe.api.parser +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.junit.jupiter.api.AfterAll +import org.junit.jupiter.api.BeforeAll +import kotlin.reflect.typeOf +import kotlin.test.Test + +/** + * Tests that strings containing JSON in a [String] column can be parsed via [parse], + * where JSON arrays become [DataFrame]s (forming a [FrameColumn]) + * and JSON objects become [DataRow]s (forming a [ColumnGroup]). + */ +class ParseJsonColumnTests { + + companion object { + @[BeforeAll JvmStatic] + fun `setup ParserOptions`() { + DataFrame.parser.parseToDataFrameReadSource = true + } + + @[AfterAll JvmStatic] + fun `reset ParserOptions`() { + DataFrame.parser.resetToDefault() + } + } + + @Test + fun `parse column of json arrays into FrameColumn`() { + @Language("json") + val a = """[1, 2, 3]""" + + @Language("json") + val b = """[4, 5, 6]""" + + val col = columnOf(a, b) + val parsed = col.parse() + + parsed.isFrameColumn() shouldBe true + val frameCol = parsed.asFrameColumn() + frameCol.size() shouldBe 2 + frameCol[0]["value"].values().toList() shouldBe listOf(1, 2, 3) + frameCol[1]["value"].values().toList() shouldBe listOf(4, 5, 6) + } + + @Test + fun `parse column of json objects into ColumnGroup`() { + @Language("json") + val a = """{"x": 1, "y": "a"}""" + + @Language("json") + val b = """{"x": 2, "y": "b"}""" + + val col = columnOf(a, b) + val parsed = col.parse() + + parsed.isColumnGroup() shouldBe true + val group = parsed.asColumnGroup() + group.columnsCount() shouldBe 2 + group["x"].type() shouldBe typeOf() + group["y"].type() shouldBe typeOf() + group["x"].values().toList() shouldBe listOf(1, 2) + group["y"].values().toList() shouldBe listOf("a", "b") + } + + @Test + fun `parse dataframe column of json arrays into FrameColumn`() { + @Language("json") + val a = """[10, 20]""" + + @Language("json") + val b = """[30, 40, 50]""" + + val df = dataFrameOf("data")(a, b) + val parsed = df.parse("data") + + parsed.rowsCount() shouldBe 2 + parsed["data"].isFrameColumn() shouldBe true + val frameCol = parsed.getFrameColumn("data") + frameCol[0]["value"].values().toList() shouldBe listOf(10, 20) + frameCol[1]["value"].values().toList() shouldBe listOf(30, 40, 50) + } + + @Test + fun `parse dataframe column of json objects into ColumnGroup`() { + @Language("json") + val a = """{"name": "Alice", "age": 30}""" + + @Language("json") + val b = """{"name": "Bob", "age": 25}""" + + val df = dataFrameOf("person")(a, b) + val parsed = df.parse("person") + + parsed.rowsCount() shouldBe 2 + parsed["person"].isColumnGroup() shouldBe true + val group = parsed.getColumnGroup("person") + group.columnsCount() shouldBe 2 + group["name"].values().toList() shouldBe listOf("Alice", "Bob") + group["age"].values().toList() shouldBe listOf(30, 25) + group["name"].type() shouldBe typeOf() + group["age"].type() shouldBe typeOf() + } + + @Test + fun `parse column of json arrays of objects`() { + @Language("json") + val a = """[{"k": 1}, {"k": 2}]""" + + @Language("json") + val b = """[{"k": 3}, {"k": 4}, {"k": 5}]""" + + val parsed = columnOf(a, b).parse() + + parsed.isFrameColumn() shouldBe true + val frameCol = parsed.asFrameColumn() + frameCol.size() shouldBe 2 + frameCol[0]["k"].values().toList() shouldBe listOf(1, 2) + frameCol[1]["k"].values().toList() shouldBe listOf(3, 4, 5) + } + + @Test + fun `parse column of nested json objects`() { + @Language("json") + val a = """{"outer": {"inner": 1}}""" + + @Language("json") + val b = """{"outer": {"inner": 2}}""" + + val parsed = columnOf(a, b).parse() + + parsed.isColumnGroup() shouldBe true + val outer = parsed.asColumnGroup().getColumnGroup("outer") + outer["inner"].type() shouldBe typeOf() + outer["inner"].values().toList() shouldBe listOf(1, 2) + } + + @Test + fun `parse column of json objects containing arrays`() { + @Language("json") + val a = """{"name": "list1", "values": [1, 2, 3]}""" + + @Language("json") + val b = """{"name": "list2", "values": [4, 5]}""" + + val parsed = columnOf(a, b).parse() + + parsed.isColumnGroup() shouldBe true + val group = parsed.asColumnGroup() + group["name"].values().toList() shouldBe listOf("list1", "list2") + group["values"].type() shouldBe typeOf>() + group["values"].values().toList() shouldBe listOf(listOf(1, 2, 3), listOf(4, 5)) + } + + @Test + fun `parse column of json arrays with whitespace`() { + val col = columnOf(" [1, 2, 3] ", "\n[4, 5]\t") + val parsed = col.parse() + + parsed.isFrameColumn() shouldBe true + val frameCol = parsed.asFrameColumn() + frameCol.size() shouldBe 2 + frameCol[0]["value"].values().toList() shouldBe listOf(1, 2, 3) + frameCol[1]["value"].values().toList() shouldBe listOf(4, 5) + } + + @Test + fun `parse dataframe with multiple json columns`() { + @Language("json") + val obj1 = """{"a": 1}""" + + @Language("json") + val obj2 = """{"a": 2}""" + + @Language("json") + val arr1 = """[1, 2]""" + + @Language("json") + val arr2 = """[3, 4]""" + + val df = dataFrameOf("obj", "arr")( + obj1, + arr1, + obj2, + arr2, + ) + val parsed = df.parse() + + parsed.rowsCount() shouldBe 2 + parsed["obj"].isColumnGroup() shouldBe true + parsed["arr"].isFrameColumn() shouldBe true + + val objGroup = parsed.getColumnGroup("obj") + objGroup["a"].values().toList() shouldBe listOf(1, 2) + + val arrFrame = parsed.getFrameColumn("arr") + arrFrame[0]["value"].values().toList() shouldBe listOf(1, 2) + arrFrame[1]["value"].values().toList() shouldBe listOf(3, 4) + } +} From 32fe3477c5d82fb44f8d93162ed1523e2a4e297c Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 26 May 2026 21:15:29 +0200 Subject: [PATCH 15/20] rewrote -orNull logic to Results too --- .../jetbrains/kotlinx/dataframe/impl/Utils.kt | 16 ++ .../kotlinx/dataframe/impl/api/parse.kt | 11 +- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 68 +++---- .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 4 +- .../kotlinx/dataframe/io/arrowReading.kt | 185 ++++++++--------- .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 90 +++++---- .../org/jetbrains/kotlinx/dataframe/io/tsv.kt | 89 +++++---- .../jetbrains/kotlinx/dataframe/io/xlsx.kt | 75 ++++--- .../jetbrains/kotlinx/dataframe/io/Jdbc.kt | 189 ++++++++++-------- .../jetbrains/kotlinx/dataframe/io/json.kt | 86 ++++---- .../jetbrains/kotlinx/dataframe/io/OpenApi.kt | 129 ++++++------ 11 files changed, 500 insertions(+), 442 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt index 070da0b884..467c6c7635 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt @@ -511,3 +511,19 @@ internal val KCallable<*>.columnName: String is KProperty<*> -> columnName else -> findAnnotation()?.name ?: getterName } + +/** + * Similar to [Result.map], but allows a new [Result] to be returned if [this] was successful. + * This result will be unpacked (so you won't get `Result>`). + */ +@PublishedApi +internal inline fun Result.flatMap(function: (T) -> Result): Result = + fold( + onSuccess = { function(it) }, + onFailure = { Result.failure(it) }, + ) + +internal fun Result>.flatten(): Result = this.flatMap { it } + +internal fun T?.toResult(exception: Throwable = NullPointerException()) = + this?.let { Result.success(it) } ?: Result.failure(exception) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index 77cbdcce35..641811cbf3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -39,6 +39,8 @@ import org.jetbrains.kotlinx.dataframe.api.isFrameColumn import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf import org.jetbrains.kotlinx.dataframe.api.map import org.jetbrains.kotlinx.dataframe.api.parser +import org.jetbrains.kotlinx.dataframe.api.rows +import org.jetbrains.kotlinx.dataframe.api.single import org.jetbrains.kotlinx.dataframe.api.singleOrNull import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.columns.size @@ -48,9 +50,11 @@ import org.jetbrains.kotlinx.dataframe.impl.LazyMap import org.jetbrains.kotlinx.dataframe.impl.api.Parsers.resetToDefault import org.jetbrains.kotlinx.dataframe.impl.catchSilent import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType +import org.jetbrains.kotlinx.dataframe.impl.flatMap import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse import org.jetbrains.kotlinx.dataframe.impl.lazyMapOf +import org.jetbrains.kotlinx.dataframe.impl.toResult import org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource import org.jetbrains.kotlinx.dataframe.io.isUrl import org.jetbrains.kotlinx.dataframe.io.newSupportedFormats @@ -961,8 +965,9 @@ internal object Parsers : GlobalParserOptions { formats = newSupportedFormats, resultKind = "DataRow", doStringToUrlConversion = isConverter, - readOrNull = { source, sourceInfo, options -> - readDataFrameOrNull(source, sourceInfo, options)?.singleOrNull() + read = { source, sourceInfo, options -> + readDataFrame(source, sourceInfo, options) + .flatMap { it.singleOrNull().toResult() } }, ).getOrNull() } @@ -980,7 +985,7 @@ internal object Parsers : GlobalParserOptions { formats = newSupportedFormats, resultKind = "DataFrame", doStringToUrlConversion = isConverter, - readOrNull = DataFrameReadSource::readDataFrameOrNull, + read = DataFrameReadSource::readDataFrame, ).getOrNull() } } else { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index 7f92575aa0..fbbc3ac97f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -41,13 +41,13 @@ public interface DataFrameReadSource { * is invoked, so only include `String` here when raw text content is a legitimate input (e.g., JSON/CSV * text). For binary formats, leave `String` out. */ - public val supportedTypes: Set + public val supportedReadingTypes: Set - public fun readDataFrameOrNull( + public fun readDataFrame( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions? = null, - ): DataFrame<*>? + ): Result> /** * Read just the [DataFrameSchema] for [source]. @@ -56,26 +56,20 @@ public interface DataFrameReadSource { * source format can introspect types without materializing rows (e.g., JDBC metadata queries, Parquet/Arrow * file footers, OpenAPI specs). */ - public fun readDataFrameSchemaOrNull( + public fun readDataFrameSchema( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions? = null, - ): DataFrameSchema? = readDataFrameOrNull(source, sourceInfo, options)?.schema() + ): Result = readDataFrame(source, sourceInfo, options).map { it.schema() } - public fun readDataSchemaCodeOrNull( + public fun readDataSchemaCode( source: Any, sourceInfo: DataSourceInfo, name: String, options: DataFrameReadOptions? = null, - ): CodeString? = - readDataFrameSchemaOrNull(source, sourceInfo, options) - ?.generateInterfaces(name) + ): Result = readDataFrameSchema(source, sourceInfo, options).map { it.generateInterfaces(name) } public fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean - - // `DataFrame.Companion.read` methods uses this to sort list of all supported formats in ascending order (-1, 2, 10) - // sorted list is used to test if any format can read given input - public val testOrder: Int } /** @@ -106,7 +100,7 @@ internal val newSupportedFormats: List by lazy { internal val dataFrameReadSourceByType: Map> by lazy { buildMap> { newSupportedFormats.forEach { format -> - format.supportedTypes.forEach { type -> + format.supportedReadingTypes.forEach { type -> getOrPut(type) { mutableListOf() }.let { if (format !in it) it += format } @@ -121,9 +115,9 @@ internal val dataFrameReadSourceByType: Map> by /** * Shared dispatch loop for [readDataFrameImpl] and [readDataFrameSchemaImpl]: handles String→URL * normalization, InputStream buffering, sorted iteration, and error aggregation. The per-format read - * operation is supplied as [readOrNull]; [resultKind] is used only in the "unknown source" error message. + * operation is supplied as [read]; [resultKind] is used only in the "unknown source" error message. * - * @param [readOrNull] [DataFrameReadSource.readDataFrameOrNull] or [DataFrameReadSource.readDataFrameSchemaOrNull] + * @param [read] [DataFrameReadSource.readDataFrame] or [DataFrameReadSource.readDataFrameSchema] * Potentially, this could also return another type, like a GeoDataFrame. */ internal fun readSourceImpl( @@ -133,11 +127,11 @@ internal fun readSourceImpl( formats: List, resultKind: String, doStringToUrlConversion: Boolean, - readOrNull: DataFrameReadSource.( + read: DataFrameReadSource.( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, - ) -> T?, + ) -> Result, ): Result { if (doStringToUrlConversion && source is String) { val url = asUrlOrNull(source) @@ -149,7 +143,7 @@ internal fun readSourceImpl( formats = formats, resultKind = resultKind, doStringToUrlConversion = true, - readOrNull = readOrNull, + read = read, ) } } @@ -176,18 +170,14 @@ internal fun readSourceImpl( val tries = mutableMapOf() formats.sortedBy { it.testOrder }.forEach { if (!it.acceptsSource(sourceInfo, options)) return@forEach - try { - val result = it.readOrNull(getSource(), sourceInfo, options) - if (result != null) return Result.success(result) - - val name = it::class.simpleName!! - tries[name] = Exception("$name returned null.") - } catch (e: FileNotFoundException) { - // fail early. File not found means the reference is broken. - return Result.failure(exception = e) - } catch (e: Exception) { - tries[it::class.simpleName!!] = e - } + val result = it.read(getSource(), sourceInfo, options) + result + .onSuccess { return Result.success(it) } + .onFailure { e -> + // fail early. File not found means the reference is broken. + if (e is FileNotFoundException) return Result.failure(exception = e) + tries[it::class.simpleName!!] = e + } } return Result.failure( exception = IllegalArgumentException("Unknown $resultKind source $source, $sourceInfo; Tried $tries"), @@ -219,7 +209,7 @@ public fun DataFrame.Companion.readSource( formats = formats, resultKind = "DataFrame", doStringToUrlConversion = true, - readOrNull = DataFrameReadSource::readDataFrameOrNull, + read = DataFrameReadSource::readDataFrame, ).getOrThrow() public inline fun DataRow.Companion.readSource( @@ -241,8 +231,8 @@ public fun DataRow.Companion.readSource( formats = formats, resultKind = "DataRow", doStringToUrlConversion = true, - readOrNull = { source, sourceInfo, options -> - readDataFrameOrNull(source, sourceInfo, options)?.single() + read = { source, sourceInfo, options -> + readDataFrame(source, sourceInfo, options).mapCatching { it.single() } }, ).getOrThrow() @@ -277,7 +267,7 @@ public fun DataFrameSchema.Companion.readSource( formats = formats, resultKind = "DataFrameSchema", doStringToUrlConversion = true, - readOrNull = DataFrameReadSource::readDataFrameSchemaOrNull, + read = DataFrameReadSource::readDataFrameSchema, ).getOrThrow() public inline fun DataFrameSchema.Companion.readSource( @@ -298,8 +288,8 @@ public inline fun DataFrameSchema.Companion.readSource( * declarations (plus enums/typealiases for formats like OpenAPI). The [name] is the marker name used for * the top-level generated interface. * - * The default implementation in [DataFrameReadSource.readDataSchemaCodeOrNull] runs - * [DataFrameSchema.generateInterfaces] on the format's [DataFrameReadSource.readDataFrameSchemaOrNull] + * The default implementation in [DataFrameReadSource.readDataSchemaCode] runs + * [DataFrameSchema.generateInterfaces] on the format's [DataFrameReadSource.readDataFrameSchema] * result; formats that produce richer code (OpenAPI markers, enums, typealiases) override the method * directly. */ @@ -317,8 +307,8 @@ public fun CodeString.Companion.readSource( formats = formats, resultKind = "CodeString", doStringToUrlConversion = true, - readOrNull = { src, info, opts -> - readDataSchemaCodeOrNull(src, info, name, opts) + read = { src, info, opts -> + readDataSchemaCode(src, info, name, opts) }, ).getOrThrow() diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index d4b9503a95..84bee18a69 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -478,8 +478,8 @@ class Guess2 { @Test fun `OpenAPI does not steal plain JSON DataFrame reads`() { // A regular JSON file (not an OpenAPI spec) still goes to Json, even though OpenApi2 runs first. - // OpenApi2.readDataSchemaCodeOrNull returns null for non-OpenAPI content, but more importantly - // OpenApi2.readDataFrameOrNull is the interface default (null), so DataFrame reads fall through. + // OpenApi2.readDataSchemaCode returns a failed Result for non-OpenAPI content, but more importantly + // OpenApi2.readDataFrame returns a failed Result, so DataFrame reads fall through. val expected = DataFrame.readJson("../data/participants.json") DataFrame.readSource(File("../data/participants.json")) shouldBe expected } diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt index bc4f1b78df..94df26773e 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt @@ -53,7 +53,7 @@ public class ArrowFeatherNEW : DataFrameReadSource { public data class Options(val nullability: NullabilityOptions = NullabilityOptions.Infer) : DataFrameReadOptions - override val supportedTypes: Set = + override val supportedReadingTypes: Set = setOf( typeOf(), typeOf(), @@ -71,47 +71,50 @@ public class ArrowFeatherNEW : DataFrameReadSource { override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } } - override fun readDataFrameOrNull( + override fun readDataFrame( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, - ): DataFrame<*>? { - val opts = (options ?: Options()) as Options - val kType = sourceInfo.kType - - // ArrowReader is exclusive; check before more general types. - if (kType.isSubTypeOf()) { - return (source as? ArrowReader)?.let { DataFrame.readArrow(it, opts.nullability) } - } - - val url: URL? = when { - kType.isSubTypeOf() -> source as? URL - kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() - kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() - else -> null - } - if (url != null) { - return DataFrame.readArrowFeather(url, opts.nullability) - } - - return when { - kType.isSubTypeOf() -> - (source as? SeekableByteChannel)?.let { - DataFrame.readArrowFeather(it, nullability = opts.nullability) + ): Result> = + runCatching { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + + // ArrowReader is exclusive; check before more general types. + if (kType.isSubTypeOf()) { + return@runCatching DataFrame.readArrow(source as ArrowReader, opts.nullability) + } + + val url: URL? = when { + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } + if (url != null) { + return@runCatching DataFrame.readArrowFeather(url, opts.nullability) + } + + return@runCatching when { + kType.isSubTypeOf() -> + DataFrame.readArrowFeather(source as SeekableByteChannel, nullability = opts.nullability) + + kType.isSubTypeOf() -> + DataFrame.readArrowFeather(source as ByteArray, opts.nullability) + + kType.isSubTypeOf() -> + DataFrame.readArrowFeather(source as InputStream, opts.nullability) + + else -> { + // return the exception without throwing it; cheaper + @Suppress("RedundantReturnKeyword") + return Result.failure(IllegalStateException("Cannot read source of type $kType as Arrow Feather")) } - - kType.isSubTypeOf() -> - (source as? ByteArray)?.let { DataFrame.readArrowFeather(it, opts.nullability) } - - kType.isSubTypeOf() -> - (source as? InputStream)?.let { DataFrame.readArrowFeather(it, opts.nullability) } - - else -> null + } } - } override val testOrder: Int = 60_000 @@ -138,7 +141,7 @@ public class ArrowIPC : DataFrameReadSource { val nullability: NullabilityOptions = NullabilityOptions.Infer, ) : DataFrameReadOptions - override val supportedTypes: Set = + override val supportedReadingTypes: Set = setOf( typeOf(), typeOf(), @@ -156,46 +159,49 @@ public class ArrowIPC : DataFrameReadSource { override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } } - override fun readDataFrameOrNull( + override fun readDataFrame( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, - ): DataFrame<*>? { - val opts = (options ?: Options()) as Options - val kType = sourceInfo.kType - - if (kType.isSubTypeOf()) { - return (source as? ArrowReader)?.let { DataFrame.readArrow(it, opts.nullability) } - } - - val url: URL? = when { - kType.isSubTypeOf() -> source as? URL - kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() - kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() - else -> null - } - if (url != null) { - return DataFrame.readArrowIPC(url, opts.nullability) - } - - return when { - kType.isSubTypeOf() -> - (source as? ReadableByteChannel)?.let { - DataFrame.readArrowIPC(it, allocator = opts.allocator, nullability = opts.nullability) + ): Result> = + runCatching { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + + if (kType.isSubTypeOf()) { + return@runCatching DataFrame.readArrow(source as ArrowReader, opts.nullability) + } + + val url: URL? = when { + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } + if (url != null) { + return@runCatching DataFrame.readArrowIPC(url, opts.nullability) + } + + return@runCatching when { + kType.isSubTypeOf() -> + DataFrame.readArrowIPC(source as ReadableByteChannel, opts.allocator, opts.nullability) + + kType.isSubTypeOf() -> + DataFrame.readArrowIPC(source as ByteArray, opts.nullability) + + kType.isSubTypeOf() -> + DataFrame.readArrowIPC(source as InputStream, opts.nullability) + + else -> { + // return the exception without throwing it; cheaper + @Suppress("RedundantReturnKeyword") + return Result.failure(IllegalStateException("Cannot read source of type $kType as Arrow IPC")) } - - kType.isSubTypeOf() -> - (source as? ByteArray)?.let { DataFrame.readArrowIPC(it, opts.nullability) } - - kType.isSubTypeOf() -> - (source as? InputStream)?.let { DataFrame.readArrowIPC(it, opts.nullability) } - - else -> null + } } - } // Runs after ArrowFeatherNEW so that `.feather` files get the random-access reader first. // Both accept `.arrow`; if Feather reading throws on an IPC streaming file the framework falls @@ -221,7 +227,7 @@ public class Parquet : DataFrameReadSource { val batchSize: Long = ARROW_PARQUET_DEFAULT_BATCH_SIZE, ) : DataFrameReadOptions - override val supportedTypes: Set = + override val supportedReadingTypes: Set = setOf(typeOf(), typeOf(), typeOf()) public companion object { @@ -236,47 +242,46 @@ public class Parquet : DataFrameReadSource { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } } - override fun readDataFrameOrNull( + override fun readDataFrame( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, - ): DataFrame<*>? { - val opts = (options ?: Options()) as Options - val kType = sourceInfo.kType - return when { - kType.isSubTypeOf() -> - (source as? URL)?.let { + ): Result> = + runCatching { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + return@runCatching when { + kType.isSubTypeOf() -> DataFrame.readParquet( - it, + source as URL, nullability = opts.nullability, batchSize = opts.batchSize, ) - } - kType.isSubTypeOf() -> - (source as? Path)?.let { + kType.isSubTypeOf() -> DataFrame.readParquet( - it, + source as Path, nullability = opts.nullability, batchSize = opts.batchSize, ) - } - kType.isSubTypeOf() -> - (source as? File)?.let { + kType.isSubTypeOf() -> DataFrame.readParquet( - it, + source as File, nullability = opts.nullability, batchSize = opts.batchSize, ) - } - else -> null + else -> { + // return the exception without throwing it; cheaper + @Suppress("RedundantReturnKeyword") + return Result.failure(IllegalStateException("Cannot read source of type $kType as Parquet")) + } + } } - } override val testOrder: Int = 60_500 diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 57cdb342da..67b33dc628 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -57,7 +57,7 @@ public class Csv : DataFrameReadSource { val parseParallel: Boolean = DelimParams.PARSE_PARALLEL, ) : DataFrameReadOptions - override val supportedTypes: Set = + override val supportedReadingTypes: Set = setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) public companion object { @@ -73,48 +73,49 @@ public class Csv : DataFrameReadSource { if (options != null && options !is Options) return false if (sourceInfo.extension != null && sourceInfo.extension !in EXTENSIONS) return false if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } } - override fun readDataFrameOrNull( + override fun readDataFrame( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, - ): DataFrame<*>? { - val opts = (options ?: Options()) as Options - val kType = sourceInfo.kType - - val url: URL? = when { - kType.isSubTypeOf() -> source as? URL - kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() - kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() - else -> null - } - if (url != null) { - return DataFrame.readCsv( - url = url, - delimiter = opts.delimiter, - header = opts.header, - charset = opts.charset, - colTypes = opts.colTypes, - skipLines = opts.skipLines, - readLines = opts.readLines, - parserOptions = opts.parserOptions, - ignoreEmptyLines = opts.ignoreEmptyLines, - allowMissingColumns = opts.allowMissingColumns, - ignoreExcessColumns = opts.ignoreExcessColumns, - quote = opts.quote, - ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, - trimInsideQuoted = opts.trimInsideQuoted, - parseParallel = opts.parseParallel, - ) - } - - return when { - kType.isSubTypeOf() -> - (source as? InputStream)?.let { stream -> + ): Result> = + runCatching { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + + val url: URL? = when { + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } + if (url != null) { + return@runCatching DataFrame.readCsv( + url = url, + delimiter = opts.delimiter, + header = opts.header, + charset = opts.charset, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + @Suppress("RedundantReturnKeyword") + return@runCatching when { + kType.isSubTypeOf() -> { DataFrame.readCsv( - inputStream = stream, + inputStream = source as InputStream, delimiter = opts.delimiter, header = opts.header, charset = opts.charset, @@ -132,13 +133,16 @@ public class Csv : DataFrameReadSource { ) } - kType.isSubTypeOf() -> - (source as? String)?.let { text -> + kType.isSubTypeOf() -> { // early fail - if (opts.delimiter !in text) return null + if (opts.delimiter !in source as String) { + return Result.failure( + IllegalStateException("String does not contain delimiter '${opts.delimiter}'"), + ) + } DataFrame.readCsvStr( - text = text, + text = source, delimiter = opts.delimiter, header = opts.header, colTypes = opts.colTypes, @@ -155,9 +159,9 @@ public class Csv : DataFrameReadSource { ) } - else -> null + else -> return Result.failure(IllegalStateException("Cannot read source of type $kType as CSV")) + } } - } override val testOrder: Int = 20_000 diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index 49f3723a5e..9e70aee554 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -57,7 +57,7 @@ public class Tsv : DataFrameReadSource { val parseParallel: Boolean = DelimParams.PARSE_PARALLEL, ) : DataFrameReadOptions - override val supportedTypes: Set = + override val supportedReadingTypes: Set = setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) public companion object { @@ -73,48 +73,48 @@ public class Tsv : DataFrameReadSource { if (options != null && options !is Options) return false if (sourceInfo.extension != null && sourceInfo.extension !in EXTENSIONS) return false if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPE) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } } - override fun readDataFrameOrNull( + override fun readDataFrame( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, - ): DataFrame<*>? { - val opts = (options ?: Options()) as Options - val kType = sourceInfo.kType - - val url: URL? = when { - kType.isSubTypeOf() -> source as? URL - kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() - kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() - else -> null - } - if (url != null) { - return DataFrame.readTsv( - url = url, - delimiter = opts.delimiter, - header = opts.header, - charset = opts.charset, - colTypes = opts.colTypes, - skipLines = opts.skipLines, - readLines = opts.readLines, - parserOptions = opts.parserOptions, - ignoreEmptyLines = opts.ignoreEmptyLines, - allowMissingColumns = opts.allowMissingColumns, - ignoreExcessColumns = opts.ignoreExcessColumns, - quote = opts.quote, - ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, - trimInsideQuoted = opts.trimInsideQuoted, - parseParallel = opts.parseParallel, - ) - } - - return when { - kType.isSubTypeOf() -> - (source as? InputStream)?.let { stream -> + ): Result> = + runCatching { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + + val url: URL? = when { + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } + if (url != null) { + return@runCatching DataFrame.readTsv( + url = url, + delimiter = opts.delimiter, + header = opts.header, + charset = opts.charset, + colTypes = opts.colTypes, + skipLines = opts.skipLines, + readLines = opts.readLines, + parserOptions = opts.parserOptions, + ignoreEmptyLines = opts.ignoreEmptyLines, + allowMissingColumns = opts.allowMissingColumns, + ignoreExcessColumns = opts.ignoreExcessColumns, + quote = opts.quote, + ignoreSurroundingSpaces = opts.ignoreSurroundingSpaces, + trimInsideQuoted = opts.trimInsideQuoted, + parseParallel = opts.parseParallel, + ) + } + + when { + kType.isSubTypeOf() -> { DataFrame.readTsv( - inputStream = stream, + inputStream = source as InputStream, delimiter = opts.delimiter, header = opts.header, charset = opts.charset, @@ -132,13 +132,16 @@ public class Tsv : DataFrameReadSource { ) } - kType.isSubTypeOf() -> - (source as? String)?.let { text -> + kType.isSubTypeOf() -> { // early fail - if (opts.delimiter !in text) return null + if (opts.delimiter !in source as String) { + return Result.failure( + IllegalStateException("String does not contain delimiter '${opts.delimiter}'"), + ) + } DataFrame.readTsvStr( - text = text, + text = source, delimiter = opts.delimiter, header = opts.header, colTypes = opts.colTypes, @@ -155,9 +158,9 @@ public class Tsv : DataFrameReadSource { ) } - else -> null + else -> return Result.failure(IllegalStateException("Cannot read source of type $kType as TSV")) + } } - } override val testOrder: Int = 30_000 diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index 543b30b333..b4efa33926 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -82,7 +82,7 @@ public class ExcelNEW : DataFrameReadSource { // String reference paths are normalized to URL by readSourceImpl, so no String entry here; // Excel is binary, so raw String content isn't a meaningful input either. - override val supportedTypes: Set = + override val supportedReadingTypes: Set = setOf( typeOf(), typeOf(), @@ -108,42 +108,43 @@ public class ExcelNEW : DataFrameReadSource { if (ext != null && ext !in EXTENSIONS) return false val mime = sourceInfo.mimeType?.lowercase() if (mime != null && mime !in MIME_TYPES) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } } - override fun readDataFrameOrNull( + override fun readDataFrame( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, - ): DataFrame<*>? { - val opts = (options ?: Options()) as Options - val kType = sourceInfo.kType - - val url: URL? = when { - kType.isSubTypeOf() -> source as? URL - kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() - kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() - else -> null - } - if (url != null) { - return DataFrame.readExcel( - url = url, - sheetName = opts.sheetName, - skipRows = opts.skipRows, - columns = opts.columns, - stringColumns = opts.stringColumns, - rowsCount = opts.rowsCount, - nameRepairStrategy = opts.nameRepairStrategy, - firstRowIsHeader = opts.firstRowIsHeader, - parseEmptyAsNull = opts.parseEmptyAsNull, - ) - } + ): Result> = + runCatching { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + + val url: URL? = when { + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } + if (url != null) { + return@runCatching DataFrame.readExcel( + url = url, + sheetName = opts.sheetName, + skipRows = opts.skipRows, + columns = opts.columns, + stringColumns = opts.stringColumns, + rowsCount = opts.rowsCount, + nameRepairStrategy = opts.nameRepairStrategy, + firstRowIsHeader = opts.firstRowIsHeader, + parseEmptyAsNull = opts.parseEmptyAsNull, + ) + } - return when { - kType.isSubTypeOf() -> - (source as? InputStream)?.let { stream -> + @Suppress("RedundantReturnKeyword") + return@runCatching when { + kType.isSubTypeOf() -> { DataFrame.readExcel( - inputStream = stream, + inputStream = source as InputStream, sheetName = opts.sheetName, skipRows = opts.skipRows, columns = opts.columns, @@ -155,10 +156,9 @@ public class ExcelNEW : DataFrameReadSource { ) } - kType.isSubTypeOf() -> - (source as? Workbook)?.let { wb -> + kType.isSubTypeOf() -> { DataFrame.readExcel( - wb = wb, + wb = source as Workbook, sheetName = opts.sheetName, skipRows = opts.skipRows, columns = opts.columns, @@ -170,11 +170,10 @@ public class ExcelNEW : DataFrameReadSource { ) } - kType.isSubTypeOf() -> - (source as? Sheet)?.let { sheet -> + kType.isSubTypeOf() -> { // readExcel(Sheet) has no sheetName parameter — the sheet is already selected. DataFrame.readExcel( - sheet = sheet, + sheet = source as Sheet, columns = opts.columns, formattingOptions = opts.stringColumns?.toFormattingOptions(), skipRows = opts.skipRows, @@ -185,9 +184,9 @@ public class ExcelNEW : DataFrameReadSource { ) } - else -> null + else -> return Result.failure(IllegalStateException("Cannot read source of type $kType as Excel")) + } } - } override val testOrder: Int = 40_000 diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt index b78be745f1..1bc770368e 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt @@ -80,7 +80,7 @@ public class Jdbc2 : DataFrameReadSource { val resultSetConnection: Connection? = null, ) : DataFrameReadOptions - override val supportedTypes: Set = + override val supportedReadingTypes: Set = setOf( typeOf(), typeOf(), @@ -90,102 +90,127 @@ public class Jdbc2 : DataFrameReadSource { override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { if (options != null && options !is Options) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } } - override fun readDataFrameOrNull( + override fun readDataFrame( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, - ): DataFrame<*>? { - val opts = (options ?: Options()) as Options - return when (source) { - is ResultSet -> when { - opts.dbType != null -> - DataFrame.readResultSet(source, opts.dbType, opts.limit, opts.inferNullability) - - opts.resultSetConnection != null -> - DataFrame.readResultSet( - source, - opts.resultSetConnection, - opts.limit, - opts.inferNullability, + ): Result> = + runCatching { + val opts = (options ?: Options()) as Options + @Suppress("RedundantReturnKeyword") + return@runCatching when (source) { + is ResultSet -> when { + opts.dbType != null -> + DataFrame.readResultSet(source, opts.dbType, opts.limit, opts.inferNullability) + + opts.resultSetConnection != null -> + DataFrame.readResultSet( + source, + opts.resultSetConnection, + opts.limit, + opts.inferNullability, + ) + + // Without dbType or a connection we can't read a ResultSet — fall through. + else -> return Result.failure( + IllegalArgumentException( + "ResultSet read requires either Options.dbType or Options.resultSetConnection", + ), ) + } + + is Connection -> { + val query = opts.sqlQueryOrTableName + ?: return Result.failure( + IllegalArgumentException("Connection read requires Options.sqlQueryOrTableName"), + ) + source.readDataFrame( + sqlQueryOrTableName = query, + limit = opts.limit, + inferNullability = opts.inferNullability, + dbType = opts.dbType, + strictValidation = opts.strictValidation, + configureStatement = opts.configureStatement, + ) + } + + is DataSource -> { + val query = opts.sqlQueryOrTableName + ?: return Result.failure( + IllegalArgumentException("DataSource read requires Options.sqlQueryOrTableName"), + ) + source.readDataFrame( + sqlQueryOrTableName = query, + limit = opts.limit, + inferNullability = opts.inferNullability, + dbType = opts.dbType, + strictValidation = opts.strictValidation, + configureStatement = opts.configureStatement, + ) + } + + is DbConnectionConfig -> { + val query = opts.sqlQueryOrTableName + ?: return Result.failure( + IllegalArgumentException("DbConnectionConfig read requires Options.sqlQueryOrTableName"), + ) + source.readDataFrame( + sqlQueryOrTableName = query, + limit = opts.limit, + inferNullability = opts.inferNullability, + dbType = opts.dbType, + strictValidation = opts.strictValidation, + configureStatement = opts.configureStatement, + ) + } - // Without dbType or a connection we can't read a ResultSet — fall through. - else -> null - } - - is Connection -> opts.sqlQueryOrTableName?.let { - source.readDataFrame( - sqlQueryOrTableName = it, - limit = opts.limit, - inferNullability = opts.inferNullability, - dbType = opts.dbType, - strictValidation = opts.strictValidation, - configureStatement = opts.configureStatement, - ) - } - - is DataSource -> opts.sqlQueryOrTableName?.let { - source.readDataFrame( - sqlQueryOrTableName = it, - limit = opts.limit, - inferNullability = opts.inferNullability, - dbType = opts.dbType, - strictValidation = opts.strictValidation, - configureStatement = opts.configureStatement, - ) - } - - is DbConnectionConfig -> opts.sqlQueryOrTableName?.let { - source.readDataFrame( - sqlQueryOrTableName = it, - limit = opts.limit, - inferNullability = opts.inferNullability, - dbType = opts.dbType, - strictValidation = opts.strictValidation, - configureStatement = opts.configureStatement, - ) + else -> return Result.failure(IllegalStateException("Unsupported JDBC source type: ${source::class}")) } - - else -> null } - } - override fun readDataFrameSchemaOrNull( + override fun readDataFrameSchema( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, - ): DataFrameSchema? { - val opts = (options ?: Options()) as Options - return when (source) { - // ResultSet has a true zero-row metadata-only path. - is ResultSet -> when { - opts.dbType != null -> - DataFrameSchema.readResultSet(source, opts.dbType) - - opts.resultSetConnection != null -> - DataFrameSchema.readResultSet(source, extractDBTypeFromConnection(opts.resultSetConnection)) - - else -> null - } - - is Connection -> opts.sqlQueryOrTableName?.let { - source.readDataFrameSchema(sqlQueryOrTableName = it, dbType = opts.dbType) - } - - is DataSource -> opts.sqlQueryOrTableName?.let { - source.readDataFrameSchema(sqlQueryOrTableName = it, dbType = opts.dbType) + ): Result = + runCatching { + val opts = (options ?: Options()) as Options + when (source) { + // ResultSet has a true zero-row metadata-only path. + is ResultSet -> when { + opts.dbType != null -> + DataFrameSchema.readResultSet(source, opts.dbType) + + opts.resultSetConnection != null -> + DataFrameSchema.readResultSet(source, extractDBTypeFromConnection(opts.resultSetConnection)) + + else -> error("ResultSet schema read requires either Options.dbType or Options.resultSetConnection") + } + + is Connection -> { + val query = opts.sqlQueryOrTableName + ?: error("Connection schema read requires Options.sqlQueryOrTableName") + source.readDataFrameSchema(sqlQueryOrTableName = query, dbType = opts.dbType) + } + + is DataSource -> { + val query = opts.sqlQueryOrTableName + ?: error("DataSource schema read requires Options.sqlQueryOrTableName") + source.readDataFrameSchema(sqlQueryOrTableName = query, dbType = opts.dbType) + } + + is DbConnectionConfig -> { + val query = opts.sqlQueryOrTableName + ?: error("DbConnectionConfig schema read requires Options.sqlQueryOrTableName") + source.readDataFrameSchema(sqlQueryOrTableName = query, dbType = opts.dbType) + } + + else -> error("Unsupported source type: ${source::class}") } - - is DbConnectionConfig -> opts.sqlQueryOrTableName?.let { - source.readDataFrameSchema(sqlQueryOrTableName = it, dbType = opts.dbType) - } - - else -> null } - } override val testOrder: Int = 50_000 diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 7a1e9f8532..630e6543b6 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -45,7 +45,7 @@ public class Json : DataFrameReadSource { val unifyNumbers: Boolean = true, ) : DataFrameReadOptions - override val supportedTypes: Set = + override val supportedReadingTypes: Set = setOf( typeOf(), typeOf(), @@ -69,27 +69,56 @@ public class Json : DataFrameReadSource { if (options != null && options !is Options) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } } @OptIn(ExperimentalSerializationApi::class) - override fun readDataFrameOrNull( + override fun readDataFrame( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, - ): DataFrame<*>? { - val opts = (options ?: Options()) as Options - val kType = sourceInfo.kType - - val url: URL? = when { - kType.isSubTypeOf() -> source as? URL - kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() - kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() - else -> null - } - if (url != null) { - return DataFrame.readJson( - url = url, + ): Result> = + runCatching { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + + val url: URL? = when { + kType.isSubTypeOf() -> source as? URL + kType.isSubTypeOf() -> (source as? Path)?.toUri()?.toURL() + kType.isSubTypeOf() -> (source as? File)?.toPath()?.toUri()?.toURL() + else -> null + } + if (url != null) { + return@runCatching DataFrame.readJson( + url = url, + header = opts.header, + typeClashTactic = opts.typeClashTactic, + keyValuePaths = opts.keyValuePaths, + unifyNumbers = opts.unifyNumbers, + ) + } + + val element: JsonElement = when { + kType.isSubTypeOf() -> + Json.decodeFromStream(source as InputStream) + + kType.isSubTypeOf() -> { + if ((source as String).isNotJson()) { + return Result.failure( + IllegalArgumentException("Source string is not valid JSON"), + ) + } + Json.decodeFromString(source) + } + + kType.isSubTypeOf() -> + source as JsonElement + + else -> return Result.failure(IllegalStateException("Unsupported JSON source type: $kType")) + } + + return@runCatching readJsonImpl( + parsed = element, header = opts.header, typeClashTactic = opts.typeClashTactic, keyValuePaths = opts.keyValuePaths, @@ -97,31 +126,6 @@ public class Json : DataFrameReadSource { ) } - val element: JsonElement = when { - kType.isSubTypeOf() -> - (source as? InputStream)?.let { Json.decodeFromStream(it) } - - kType.isSubTypeOf() -> - (source as? String)?.let { - if (it.isNotJson()) return null - Json.decodeFromString(it) - } - - kType.isSubTypeOf() -> - source as? JsonElement - - else -> null - } ?: return null - - return readJsonImpl( - parsed = element, - header = opts.header, - typeClashTactic = opts.typeClashTactic, - keyValuePaths = opts.keyValuePaths, - unifyNumbers = opts.unifyNumbers, - ) - } - override val testOrder: Int = 10_000 override fun toString(): String = "Json" diff --git a/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt b/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt index ef7e087916..1f6363a22b 100644 --- a/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt +++ b/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt @@ -22,13 +22,12 @@ import kotlin.reflect.typeOf * [DataFrameReadSource] for OpenAPI specifications. * * OpenAPI doesn't produce a `DataFrame` or a single `DataFrameSchema` — its output is a multi-marker code - * blob (interfaces + enums + typealiases). Only [readDataSchemaCodeOrNull] is overridden; the DataFrame - * and Schema methods return `null` (via the interface's defaults), so calling - * `DataFrame.readSource(openapiFile)` falls through to JSON, while `CodeString.readSource(openapiFile, name)` - * dispatches here. + * blob (interfaces + enums + typealiases). Only [readDataSchemaCode] is overridden; the DataFrame + * and Schema methods return a failed [Result], so calling `DataFrame.readSource(openapiFile)` falls + * through to JSON, while `CodeString.readSource(openapiFile, name)` dispatches here. * * `.yaml`/`.yml` files are unambiguously OpenAPI; `.json` files are disambiguated at read time by - * [isOpenApiStr] returning null early when the JSON isn't actually an OpenAPI spec, letting the framework + * [isOpenApiStr] failing early when the JSON isn't actually an OpenAPI spec, letting the framework * fall through to the JSON format for plain data. */ public class OpenApi2 : DataFrameReadSource { @@ -41,7 +40,7 @@ public class OpenApi2 : DataFrameReadSource { val visibility: MarkerVisibility = MarkerVisibility.IMPLICIT_PUBLIC, ) : DataFrameReadOptions - override val supportedTypes: Set = + override val supportedReadingTypes: Set = setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) public companion object { @@ -67,81 +66,89 @@ public class OpenApi2 : DataFrameReadSource { val ext = sourceInfo.extension?.lowercase() if (ext != null && ext !in EXTENSIONS) return false if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false - return supportedTypes.any { sourceInfo.kType.isSubtypeOf(it) } + return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } } // OpenAPI doesn't produce a DataFrame. - override fun readDataFrameOrNull( + override fun readDataFrame( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, - ): DataFrame<*>? = null + ): Result> = Result.failure(UnsupportedOperationException("OpenAPI does not produce a DataFrame")) // ...nor a single DataFrameSchema, it can produce enums, typealiases, etc. - // so it only supports readDataSchemaCodeOrNull() - override fun readDataFrameSchemaOrNull( + // so it only supports readDataSchemaCode() + override fun readDataFrameSchema( source: Any, sourceInfo: DataSourceInfo, options: DataFrameReadOptions?, - ): DataFrameSchema? = null + ): Result = + Result.failure(UnsupportedOperationException("OpenAPI does not produce a single DataFrameSchema")) - override fun readDataSchemaCodeOrNull( + override fun readDataSchemaCode( source: Any, sourceInfo: DataSourceInfo, name: String, options: DataFrameReadOptions?, - ): CodeString? { - val opts = (options ?: Options()) as Options - val kType = sourceInfo.kType - - // Resolve to OpenAPI-spec text, returning null if the content isn't OpenAPI. - val text: String = when { - kType.isSubtypeOf(typeOf()) -> { - val url = (source as? URL) ?: return null - if (!isOpenApi(url)) return null - url.readText() + ): Result = + runCatching { + val opts = (options ?: Options()) as Options + val kType = sourceInfo.kType + + // Resolve to OpenAPI-spec text, returning null if the content isn't OpenAPI. + val text: String = when { + kType.isSubtypeOf(typeOf()) -> { + if (!isOpenApi(source as URL)) { + return Result.failure(IllegalStateException("URL does not point to an OpenAPI spec")) + } + source.readText() + } + + kType.isSubtypeOf(typeOf()) -> { + if (!isOpenApi(source as Path)) { + return Result.failure(IllegalStateException("Path does not point to an OpenAPI spec")) + } + source.readText() + } + + kType.isSubtypeOf(typeOf()) -> { + if (!isOpenApi((source as File).toPath())) { + return Result.failure(IllegalStateException("File does not point to an OpenAPI spec")) + } + source.readText() + } + + kType.isSubtypeOf(typeOf()) -> { + if (!isOpenApiStr(source as String)) { + return Result.failure(IllegalStateException("String content is not an OpenAPI spec")) + } + source + } + + kType.isSubtypeOf(typeOf()) -> { + val text = (source as InputStream).bufferedReader().readText() + if (!isOpenApiStr(text)) { + return Result.failure(IllegalStateException("InputStream content is not an OpenAPI spec")) + } + text + } + + else -> error("Unsupported source type: $kType") } - kType.isSubtypeOf(typeOf()) -> { - val path = (source as? Path) ?: return null - if (!isOpenApi(path)) return null - path.readText() - } - - kType.isSubtypeOf(typeOf()) -> { - val file = (source as? File) ?: return null - if (!isOpenApi(file.toPath())) return null - file.readText() - } - - kType.isSubtypeOf(typeOf()) -> { - val text = (source as? String) ?: return null - if (!isOpenApiStr(text)) return null - text - } - - kType.isSubtypeOf(typeOf()) -> { - val text = (source as? InputStream)?.bufferedReader()?.readText() ?: return null - if (!isOpenApiStr(text)) return null - text - } - - else -> return null + CodeString( + readOpenApiAsString( + openApiAsString = text, + name = name, + auth = opts.auth, + options = opts.parseOptions, + extensionProperties = opts.extensionProperties, + generateHelperCompanionObject = opts.generateHelperCompanionObject, + visibility = opts.visibility, + ), + ) } - return CodeString( - readOpenApiAsString( - openApiAsString = text, - name = name, - auth = opts.auth, - options = opts.parseOptions, - extensionProperties = opts.extensionProperties, - generateHelperCompanionObject = opts.generateHelperCompanionObject, - visibility = opts.visibility, - ), - ) - } - // Run before Json (10_000) so .json files get the OpenAPI content check first. override val testOrder: Int = 9_000 From 3979f37da8c9b09f3ccffdedb838d2ce355ae218 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 26 May 2026 21:23:12 +0200 Subject: [PATCH 16/20] adding missing test files --- core/src/test/resources/petstore.yaml | 304 ++++++++++++++++++ core/src/test/resources/sample.xls | Bin 0 -> 5632 bytes core/src/test/resources/sample2.xlsx | Bin 0 -> 4808 bytes core/src/test/resources/test.arrow | Bin 0 -> 55328 bytes core/src/test/resources/test.feather | Bin 0 -> 56538 bytes core/src/test/resources/test.parquet | Bin 0 -> 28203 bytes ...s.kotlinx.dataframe.io.DataFrameReadSource | 1 + ...s.kotlinx.dataframe.io.DataFrameReadSource | 1 + 8 files changed, 306 insertions(+) create mode 100644 core/src/test/resources/petstore.yaml create mode 100644 core/src/test/resources/sample.xls create mode 100644 core/src/test/resources/sample2.xlsx create mode 100644 core/src/test/resources/test.arrow create mode 100644 core/src/test/resources/test.feather create mode 100644 core/src/test/resources/test.parquet create mode 100644 dataframe-jdbc/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource create mode 100644 dataframe-openapi-generator/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource diff --git a/core/src/test/resources/petstore.yaml b/core/src/test/resources/petstore.yaml new file mode 100644 index 0000000000..89255de8e9 --- /dev/null +++ b/core/src/test/resources/petstore.yaml @@ -0,0 +1,304 @@ +# DEMO for DataFrame, this might differ from the actual API (it's updated a bit) +openapi: 3.0.0 +info: + version: 2.0.2 + title: APIs.guru + description: > + Wikipedia for Web APIs. Repository of API specs in OpenAPI format. + + + **Warning**: If you want to be notified about changes in advance please join our [Slack channel](https://join.slack.com/t/mermade/shared_invite/zt-g78g7xir-MLE_CTCcXCdfJfG3CJe9qA). + + + Client sample: [[Demo]](https://apis.guru/simple-ui) [[Repo]](https://github.com/APIs-guru/simple-ui) + contact: + name: APIs.guru + url: https://APIs.guru + email: mike.ralphson@gmail.com + license: + name: CC0 1.0 + url: https://github.com/APIs-guru/openapi-directory#licenses + x-logo: + url: https://apis.guru/branding/logo_vertical.svg +externalDocs: + url: https://github.com/APIs-guru/openapi-directory/blob/master/API.md +security: [ ] +tags: + - name: APIs + description: Actions relating to APIs in the collection +paths: + /list.json: + get: + operationId: listAPIs + tags: + - APIs + summary: List all APIs + description: > + List all APIs in the directory. + + Returns links to OpenAPI specification for each API in the directory. + + If API exist in multiple versions `preferred` one is explicitly marked. + + + Some basic info from OpenAPI spec is cached inside each object. + + This allows to generate some simple views without need to fetch OpenAPI spec for each API. + responses: + "200": + description: OK + content: + application/json; charset=utf-8: + schema: + $ref: "#/components/schemas/APIs" + application/json: + schema: + $ref: "#/components/schemas/APIs" + /metrics.json: + get: + operationId: getMetrics + summary: Get basic metrics + description: > + Some basic metrics for the entire directory. + + Just stunning numbers to put on a front page and are intended purely for WoW effect :) + tags: + - APIs + responses: + "200": + description: OK + content: + application/json; charset=utf-8: + schema: + $ref: "#/components/schemas/Metrics" + application/json: + schema: + $ref: "#/components/schemas/Metrics" +components: + schemas: + APIs: + description: | + List of API details. + It is a JSON object with API IDs(`[:]`) as keys. + type: object + additionalProperties: + $ref: "#/components/schemas/API" + minProperties: 1 + example: + googleapis.com:drive: + added: 2015-02-22T20:00:45.000Z + preferred: v3 + versions: + v2: + added: 2015-02-22T20:00:45.000Z + info: + title: Drive + version: v2 + x-apiClientRegistration: + url: https://console.developers.google.com + x-logo: + url: https://api.apis.guru/v2/cache/logo/https_www.gstatic.com_images_icons_material_product_2x_drive_32dp.png + x-origin: + format: google + url: https://www.googleapis.com/discovery/v1/apis/drive/v2/rest + version: v1 + x-preferred: false + x-providerName: googleapis.com + x-serviceName: drive + swaggerUrl: https://api.apis.guru/v2/specs/googleapis.com/drive/v2/swagger.json + swaggerYamlUrl: https://api.apis.guru/v2/specs/googleapis.com/drive/v2/swagger.yaml + updated: 2016-06-17T00:21:44.000Z + v3: + added: 2015-12-12T00:25:13.000Z + info: + title: Drive + version: v3 + x-apiClientRegistration: + url: https://console.developers.google.com + x-logo: + url: https://api.apis.guru/v2/cache/logo/https_www.gstatic.com_images_icons_material_product_2x_drive_32dp.png + x-origin: + format: google + url: https://www.googleapis.com/discovery/v1/apis/drive/v3/rest + version: v1 + x-preferred: true + x-providerName: googleapis.com + x-serviceName: drive + swaggerUrl: https://api.apis.guru/v2/specs/googleapis.com/drive/v3/swagger.json + swaggerYamlUrl: https://api.apis.guru/v2/specs/googleapis.com/drive/v3/swagger.yaml + updated: 2016-06-17T00:21:44.000Z + API: + description: Meta information about API + type: object + required: + - added + - preferred + - versions + properties: + added: + description: Timestamp when the API was first added to the directory + type: string + format: date-time + preferred: + description: Recommended version + type: string + versions: + description: List of supported versions of the API + type: object + additionalProperties: + $ref: "#/components/schemas/ApiVersion" + minProperties: 1 + additionalProperties: false + ApiVersion: + type: object + required: + - added + # - updated apparently not required! + - swaggerUrl + - swaggerYamlUrl + - info + - openapiVer + properties: + added: + description: Timestamp when the version was added + type: string + format: date-time + updated: # apparently not required! + description: Timestamp when the version was updated + type: string + format: date-time + swaggerUrl: + description: URL to OpenAPI definition in JSON format + type: string + format: url + swaggerYamlUrl: + description: URL to OpenAPI definition in YAML format + type: string + format: url + info: + description: Copy of `info` section from OpenAPI definition + type: object + minProperties: 1 + externalDocs: + description: Copy of `externalDocs` section from OpenAPI definition + type: object + minProperties: 1 + openapiVer: + description: OpenAPI version + type: string + additionalProperties: false + + Metrics: + description: List of basic metrics + type: object + required: + - numSpecs + - numAPIs + - numEndpoints + - unreachable + - invalid + - unofficial + - fixes + - fixedPct + - datasets + - stars + - issues + - thisWeek + properties: + numSpecs: + description: Number of API specifications including different versions of the + same API + type: integer + minimum: 1 + numAPIs: + description: Number of APIs + type: integer + minimum: 1 + numEndpoints: + description: Total number of endpoints inside all specifications + type: integer + minimum: 1 + unreachable: + description: Number of unreachable specifications + type: integer + minimum: 0 + invalid: + description: Number of invalid specifications + type: integer + minimum: 0 + unofficial: + description: Number of unofficial specifications + type: integer + minimum: 0 + fixes: + description: Number of fixes applied to specifications + type: integer + minimum: 0 + fixedPct: + description: Percentage of fixed specifications + type: number + minimum: 0 + maximum: 100 + datasets: + description: An overview of the datasets used to gather the APIs + type: array + items: + description: A single metric per dataset + type: object + required: + - title + - data + properties: + title: + description: Title of the metric + type: string + data: + description: Value of the metric per dataset + type: object + additionalProperties: + type: integer + minimum: 0 + stars: + description: Number of stars on GitHub + type: integer + minimum: 0 + issues: + description: Number of issues on GitHub + type: integer + minimum: 0 + thisWeek: + description: Number of new specifications added/updated this week + type: object + required: + - added + - updated + properties: + added: + description: Number of new specifications added this week + type: integer + minimum: 0 + updated: + description: Number of specifications updated this week + type: integer + minimum: 0 + additionalProperties: false + example: + numSpecs: 1000 + numAPIs: 100 + numEndpoints: 10000 + unreachable: 10 + invalid: 10 + unofficial: 10 + fixes: 10 + fixedPct: 10 + datasets: + - title: providerCount + data: + "a.com": 10 + "b.com": 20 + "c.com": 30 + stars: 1000 + issues: 100 + thisWeek: + added: 10 + updated: 10 diff --git a/core/src/test/resources/sample.xls b/core/src/test/resources/sample.xls new file mode 100644 index 0000000000000000000000000000000000000000..fcdced4e3435c0f7e54e6463062642bcdca0ae29 GIT binary patch literal 5632 zcmeHLU2IfU5T3g~<@SemTUu0*dTH#^($-ioh%wlu1(gJ(DK!{FqTB85k8Hc7lps;# zQs7Mz4G+GM7?QpajTTJwL5=O34;l>#N%5)pM)YAZ7(+t4e&5`?>1ElaThm}LJDWLY z?wmPi=FFKh=bZiJ=jyo&m)2dEEO=i0vXCy75)a=1ZDzhhB0kXMEu_=w3>#z-9wHCS zyS8!-16af$kZZmQNM5O1KSWb1nz{@R^hg4~aj|5#jG`Ws6Ik)WVJ#xYtFDlq-h5Gr z{8BPS{fi!0%2#an`k(omdE5`o-v7+?-2Y|3a$p5;HLwy`1;h~`)xa9y8elE34!9Qh z2=GzhW59Jl>U`X-Nk0MFa=#ngdT|y->Opy+)Wj;51m&C6KQiTO?!S(RmJ*! zAvwa>=!(W;)`NmhwIOOSHm4D!J3tw79^6X#Lb=~+xkIB5LJ_gHqvDO^mJw0g&uZDA zWhF{OzScg!DyJQB`2*zB?|nK}!avE%IrY@^2N!r?+y+=i_tKT=M^4{v>>Aae6Lf{M_RBA(hQ<&ckob!{3~T zUwRM!oDX{{DQVWBq+jdmWQo=_$$-{tlB=|?O$O6%YV4jt{GVW=#!fvjWZUEIA7F19 z_{|t5P^9;)A-~J(%+RSGolZ%LnV4Icaln}J;#Mh7F@t}PyS*tV4P<Lbn|bd{X59;9EpfQV~KdXQ*BA(tS}{lFP;@KvknNt1)dEA9}g5K@r3S5 zJQWzSABJc!aM7vS;9-obw>wnE8Klb6`a;G3}dz` z-00Zkp_@RTYMY&Cn@x2zd*}<&fD^SJ)8lVKT8&mC(?3%i@+!tQPz(`VTwyy7Tbc2B3n0_J7p7=qrOJM+n(xAzZ)KK;m)mQ7z? zqu1XB(ur)R;k=(kfb`5>An$3f0lEMBfxNFB0n*4D^CI;w{S4{|P_C38q^#2X21J0{ zrrwzv>Y8O9kZYaW_8tcf@*c=%KlS;p{&e;HQ|m+LK8F3Rw?E$nUNDPy2s~yd4o+kw zu`g}S$47G!R>%V@`To=O%i4pv%;Z~R+TGv>@%kK-!_uek(8IaJ@5`$}v@l+fWHbld zSJ=~dSu+u1t_zw+ut)a8hPONtg^|?H`7r+w3hBkXw8s@Rpf@gdA>DGyltI6v*(q}h zWidD4n{PEjP!7m3%zp&8UcUYJ;9r0NT@!M~aT`v6lOLK~{mk%%&YwBxGk3_K?1F}v h+FpFD-~YA#e*m$oEe8Mq literal 0 HcmV?d00001 diff --git a/core/src/test/resources/sample2.xlsx b/core/src/test/resources/sample2.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..db7ef6786da71704a26c0fb5b7fcddbf8eb78992 GIT binary patch literal 4808 zcmaJ_1z42Z)&}YBMq0Y0!6Y0Vx@)9|kY)zy?gI>sbb|t1d(T4qPNq*Rq`0>a%_jBf#?N0#*R;{=Ym z0YaipWNJeSS~|J(s*UzS?x)_WxZD$<>V*iGtUB$Q=+xGi_v^zUS&t~zLMkqx(sj6i z*o&qFm5om}pW8;bY#yPhE8*Z;l?HuSKrQ>;Ei}~szY7cr>I)Cl98M5Bb0;S|K@SIe zm{yKDR~kJX@dEt zG*831OuLQS?fQ8-107~<_tonKL#94%O9lr_6*#uh_z2 zgCyeITaO8!#dOW-=2zRl;|~ofu3(Logj*X^AVWT7p8lyfWdcjt^YORQ?2j?%E5fp^ z9W566>fReKv!p3fxq!QPpZ1u?|FKP>GOzsL#2&{2*uMuf#yax z(GV7q-+M{*^?k6{N*Xe2%{mX?HPjgovp7jbh1`;UVe$jRrr@1t_R;gv*k?uX65= zrJnWFNf>v%R8r9dW6PXN@!Z`pRT%JV$^ndlC1BViGK@qWyKW*U!t`p`8xHBC3kfPB zYW@jjjEEpV;M4j1Jf{`(eq^Yw_cq6Sw9_&EGo9<0-qto z7~|h*REYW%^=*a5CeDi*#tZ-8Nz`9qI>GS z$~ng;ndl3FUgW?rJC_?D4Ui1(ja2lU=u{(p*4LQJTZ2V?ho4B-v3##q-9vk_T`Ju< zKbw_7lF(=(;+S?=V4C4v6vDFkV;!s@<(C-X7uM=E>Z*|5qG)KVVmWb`IY!2HL18WS zp;0z0Gi>FE3wdZX3CD1%Ye&VuZp&`Wz=@t`)Amab5t zUyr+2KyKDmahjGQZQiLQbzOg9_to)Xy0%;VyUq8H>2X{du#Nyx>`}NM&4B%m`R+GBeLRTP67Y)akb&=*HJh^bn)%CVN9zubu(?dG4}z6u61emNkAiAzYqszb6Lx>5X(oyV6u!L+BvyxnP)u{d~tEo z3+KL{`hhaw@6RhV+|U>yy1+G_VHX?2vsbig=Bukon14($-^JiL7hmQ+s5Zg7T;h*0 zCjF$-&GCmAJ+b#@W7YX2VztR~NFU39zEUVmd?``Vb0$wsvo176Pt-(;C`!s4JSzwP zcJRdC$O*PJ6i2%M@^ruLd`Ch&YS*0mGmN~jgQVR^wh0h?pvt1yL$hn1cy8}38$;7C zZ)cm-W4HvG3@RiFRGbG)eupOn7cg)_zfFpX*F|v?7%%tB@NedBM=%A-ITM#2j4xhx za1ZBo0biH<(<8lyetJGEkx&@6Qkp=zrgDPv)5XdBGXc>J5g|72Gd-7*$1bsHGUn85 zR>fpH#O&XS$*jZ4a#Yy|N!Y22$!MP9-)UC1V^0DhhTRK-@Uy&DTkhH)sVq*)!Ak_j zZoxccS5q@JL~&}qbgOZ2z14cE>0akFV2^p5Uw)I8Sa9AXTk;&-S{O+{f%NhDdOj=$ zDkyM0Pb|hgwVZ>FY)er=1vvNa`NvD(cq@e&o@B-}QNJ^c%GKz4lC71UL~!D*Snk<( zT8wP5dd&kdjt$>n&_$5c?4H4g@MpuH(kn|e6yzNQ+|g=%)E4$SrX-8iQHr2yv(lMy4Xv*Ai7F_ny7i-dK7Wp$NQ)Br~fPc{}O{YWzS#2 zFkZ#3Q~{1J#_5iQjmEn9hhg5IsMG~6YTF!AovPmlqGZ@D#`IQ-?H<=J^!ga?N(#fy$; z#+oni35nnwI-IUPSbfBHdy>7sGB`(5!X59ki51h17l9Mrbh*0U)=!Ru{SRqR$}8t) zeQsSvMs4@hYZSHaJO3yJDgQ<5Hf9h@u$C+2g`@Q^cK3}@!S4_z4L#0g8<=~Ilq=-e zklV%d3W$qw$W0qpHat86$j;RJH0_+fxgfKuHZx}i%4>aQiF?j?A4Xr1a&UngR8!PG z7_Z(*LFZMYHDHj$T^+`=R+N}98`2J;qA6UOF~^snNdxpL$=%xxN$upHZg|*h`XJER zrKl_qt2&{z(&LA1!6d`?v~`c_3jP%0?pjt_nl(V5eUgv-W&06M|NhseUz*pt81s)} z)H3!^zrWYKD0)BxJ6WhhoSdOT7ETb$U#eHr$SFve(s#a+#W!3Irp3?(2Kl7-k&AG*i@P})NF;85^eM-4VaA~AUZ2s;`og1yESjV{UjKo2XYG<;bzbX*?4@No=*5DmzElg5}z-c0*4vOMM$ zs7X$~6waPAFfo#vKzBRwnD)|Zp4QE!Xe_K5jmFxvj=H~r!EGfrq!TOgbWY$t)`pXA zIry1)F@I{clKWwKz}Di*eaDp_G5yf(9Cj5RJf5`YuyKakWN}7?gX2*k$TdLai`C^G z-i#YNQQJq|$y*Js%onGz8RGjrv-dES$*M$LXb< zSW+9wZsCDVu%Xp-O1x054i6j*X?Zq~UypGC38{uWuSk1QAR}34*eFM=>66m8$hV@W zHdt14B;#s{uXa~f?b}%$HXDkoSfsE@329hP*&S6+Oy2D8ueBaN%$EP=A`wIcU~vXi zF0X5(YJe70w5*rezRKeQlmhE1TE=q7jQwJVTM(sZrAElwjFj`9hWTS7mzt^})|SU? zNW|ymG8iui;A9qU?7jR;&G4UW@Nc5R;DZX*-(#5q^#w@Y$7v6(B+B$eV5fOcFcUn@^+$5&o za2f<3jIL)Yucvb460jFmK=y0vl!W^1W%{W>1P|Mdkxh5 z&YQeep4BTIMJ;&5_9JjmItW;kACym)cg@{PF5!ug~HC#Q|oJ%^-gD65*u9~-At zaxd*(zE+gNLeLiTxiMMEWhVU+h=!%bHdbW9FKNvE3c|5Rs}csNUD3n3+8Oj)lxWw7 z4mZ2*t3!wXv>QhdzdPP+y{?+_YokL=^;?VnyYo%ud)1v?n+l2$QO?)B+V8G6S@TL7 zUmF4Tzvulgq5S(OHyP%NVXqAz|JNvg!?(YWbF+Y7@zJ$$prU!Tkl)$q_YrQ^m#ad5 zZA66sqwN3gelzQ?iqN%Hp#uAF*7#RR`rZ2`GOjZ7+P)J1&pcIE!a_wE4Gj-jRzAv>Gla8T3Zkm8UMDxoLD=Q=;D(xXFGxPC5E8@w9`}uCZyB~Y)d(XOm-21!t_xxC|*V{MCeb(9Q zth4soXYV!aKmYvm&)8~0_y`KMMO{%tR2Sa(3!zFi#06sB__rGWD?}rTA-u+^8bNevkyhfFHPp^oM~P^aOY-G4{7V|HKY!Q9fGm|3RV-m^Xdq z6i3j6Srcc?78j6TA*u=&{y(Kc8h^y;eJD_#InB)JlM-gr|9R`5to)M*;iad~uaMq` z*U0Gu118Lz9v3&A(x0K%{{DaRSn>7sv7PpZ*Uz0YY4)tibA^c1^C?77c^Yr}Tz?I`%&` zZuZ1^_`baXN$rIvc&zd%9);#=|n2PKPF?+7Mb&Eh zXAAFFzMa}6KWw4ahw^k0Z3^XevauQNQsHm;&y8qrD5SFs=u1I}r$Va`jJWZ7?^@*> z6R83txEq(-_}7U0Q-wgtq%Q=)mtUXPzC3RHYs9IZQplIK8*$@vIUgv0o$;>`zhL}F z2jdSm$5Z%orgSuKH{v$VZ&qjXb|Y@=-%l!3BW~=!SlVvHjs3?zXnubqZtOqBZr*Oh zjr2COcNK#XH`2R;D{d9Sh#Tqi9p>#u+(=&%YTj+ zOCS>*0q4Lc;2ZEG@WS!g08l-72=oGvg0Wx*cnYin>0mF&0cXKw@HMy#@Uu<$fYzWJ zco+-?Pk<-ELXZqLfj!_&a0Yw~u7NuMO|rNTv;q%;o?r+V1Ezx{@Ek}3yTM^_8W`>C z$SHH@*&a!nI5R$Oip_3{X{K0!DHdpo1({;OrkKMN3o*rFR~ zGiOLM=SVYWNi*k3GiORO=PJOQs{nJZ0?fGzFy|`3oT~tHt^&-t3NYs?z?`c9bFKo- zxe7GrD$ty(Ky$7F&AAFR=PJ;gt3Y$E0?oM!H0LVFoU0&nu7b?D3Nq&^$egPnbFPBS zxe7ApD#)CxAakyQ&AAFT=PKBot6+1kg3Y-KHs>nXoU34Su7b_E3O47;Va}DqoGXVp zR}OQo9Ohg(%(-%ybLBAS%3;ox!~xnsene=gMi$mD8Lnr#V+nbFQ4`Tsh6Ta+-7HH0LVRoU2fCu0qYZ3N_~{)SRnO zbFMNfOKqN>6X&?s_f>O`~7h?_(2@*jX$N`0*6g0uLg9Ai@ zM34q@Kp`jvO>iOO0FfXOq=6hz2ueW{TtGQMBuE5lAO{qJQqTm~7!D8#jO&y99rHV3Qkj$oTex`O;L85qVP0D>1m4M(-h?wpeVlpMfn9N$}d1segTT| z3s96_fTH{Y6y+D7D8B$j`2{MkeojUCIThvS zRFt1nQGQNE`GqRVFH})}p^EYgRg_<-qWnS?qZLg+5Olt>}Bi>_DS}a?BCff zaD7Rk2YU>AA)D^IQaHk3*-x{#vEOE2 zVc%mn!}T_WP<9l10h_MPDZI(P%>I#W^aq;mL63-?JwI-;t?v}uQ0k7A?vKk`&3Am{ zt>zm*@>cWxAbG3#_K>{Qe78v6YQA|SZ#CablDC>~Ey-KW_rK(=(*1kOX3wQ1-QTyQ z_FP`l{eMep&m|`H4_H!rE;FhBz>?Z?sY(3{a%!o+!Lr$NC6oFiEU7(LGpWDAlG<}c zlln6(sXbRUslP)`E%kp`HhZpXQvZl0wdcAf^`BT$d#-Cz|B5BG=ej2KzgSXxu4_{N zjGS8Pud!_QT-T)j97}4?bxrE;v847~*QEX+OKQ(`P3kX_Q%n6vmd&2)n$*8!N$t6= zN&Qcj)Sl~_)IViO?YXW={a2RMp6i;_za^)Z`nxQfJ=Zm z&63)4U6cCTQA(!_FUJb{zgk`&vi}ekF=!rT-T)jN;$REe`(q5xvok5o0im`>zdU6 zX-Vz5u1Wo)meii>n$&-4NzMJKqbdab60UZmR{jA!2ZWEAJS%DWnzW34hfiE6xN^PZ z*J}0s+IDrg29F#co3MCg%FDZ6&pmaqu=wUre|Y=0=wR>Oe^^vZT%vpR#%-C|$Is<| z{>|+&uewcaor8LXMU0&q|I~_g={xr4^Q+`m�ALFTO!kW7qxuEq&UQwJ-L+&^61R9^`h$_6&D9``g02 zhn5U48j<&S<`XFsk|sq@3!7n|?Kf8}xbal}(%j|xXZh;q>f+Q7b;QoE8i*C2HWgDo zXekDqZYMe(?IgSpJ|w>16)fI+xrcZyrH@#%a-f*G_)#$|VT5=ncC_#rKSA6XF+~&% znjzlkJx6Q|ULfXoSt26aJR?Gyt`dIr){B37y(m5}ds!U2ogub-vqvoXe7_iLpm!ujr_;{KB( z#IJ`(i!b&~5XUp7h^=We#L_i$#DwJwgez%@Xfx{>;Wc@c_g7q~-)U$O(=ptXyY>uC}H?@_xGPb>VJEF7L7G@Vu_i~8npq`>n z=f0w?ElgBxGOWCA(Z8$5B|S0;cV`+MD_KpW+_w2m&+ml09R7)4HC~IVp8fIYCrWU) zp{G`mF#=cp=hh9rPQB#ew@!7TtM1PS^_a5+J=~M`zL8UmVY9cZb^Wd1eHeKbv#ZZu z3<37aYw=udUyST)`QpWy4bdyUsGjd745`d0diL?4Cb)Cp_+!harWl#FA@SO;ei*TE z=XS^O%`ozB*tD7NG#4Ub|KGCPw-n+^t*El+aQkZNwETyEz|e-0GhxpRYlD$?ZQr?Y zs4YeYxv%L#dHe-Dg2Y?xJYA4Z?{ zo#XGAi(wcSKg?KE*c+n(y59dzXdfYpSG`bUYhR4?`>xhd(H|od`!9cC(jyoJx#QQu za|38(`P>$r!-Pm3_rt`sgD`UP*rdHBgE11&@5-$asG#=5kpV{@#fXlNFD_^{T!{YK zBX2JW#|Zp&Z+m_97-n|d3J&W(0wVm;}|I&({%i$ zD2&_ATfZ|1w*fb{^!|R+6BtRj$ldMUSd4ruDw{igJnjrR<{W)z0!HR<`2LUf6ESM@ z&cy!7F&G&=?DJJWOu|UR{ShAxn}U&%wcc-(JynQV)B25R9E%YqXO3-8nl8lhwn5*1 z@gzo&yZ3kQ9fwg8SKG($n2C{-J+{7C9dkbRr8mA88;=`oH*;&d12gxY?1T7vN6fyWed-o`eyj9izW${ge>Dt|;p8%p%ME6+vKFe@GM+GdOyBs4ja`v4sd`5`8hVANge^!XW zb2dk8U5SyIA2v)E$rwr6bPyE1A+6k8wi}Q-9SXZK;XRX1~fgw4a6uI2%Nv&K+yRc2%Nv&K#Ya~b?3Q(z90Zqkl12G8(H0{L=1kUenAaH(n12GK-0_S-* z5IE1ffxvm*4a5u>2%PWTK;V4u24WTr1kU?zK+`_lK;XPjp@hB@3NHO$;lsAE?Q9o2 zoE^=MXS>-c>Fkj%LTR-Ru;020M$L$IfRLvrE`i z5B2@CZEQQ+#SUjjv*X!rb_zR#oyE>$=d+91C2Xqy`u^EAww>)_8?*nSIUdh;vs2g^ z>@0R3JD*+5E@9Jgr|+L_W82v-b~rnl9nW^NQ`i~oEOs6{pIyu@Vbk%h@1JdB+u1I5 zI6Imh&vvs@*ct3Bb{;#QUCb_FQ#+&YpKa6K3jfs;&;gDgz#0nY~c;5MjR6<1^+3`_+pzz&cHz68I4X4No%3q*o> zAQfZ*>Lb4eYFEeABNzZCgXJItoB&tAufVSceu;yTAOWO+*TF?_6L{AYq66p;Vt^ZL z1INMVpbRwe##IA|0MuW-4(tc-fg7MkEsQe&eL*x>3buf^KoOuZ-i>PG3I~LPSzry= z2QGl?pjsVVK>!yR4;F)$!C&FR(wFtWQ+^LZ@uFZCf27-XZ#s$-8Ch5jMTP<@+on5d zK`4p{Z&*sxxbQ*e&q8tF3;59+9~+k~2|IO1-+34x5d|4vq`TyH0zMKhD`(b2woxFr z_UaCs(HGf7Q+2bvOS~wyyvsY|y9RaX|6`}_*KPlHKs}0=P5MfA-nB*bDV{vB$$jMT z;w;^M6NdOuy!h(3x--YR8&Ev{i)OxL=a@Hji_eBPr1*u$Z|Y7dT+xW)QKMQnCfhC_ z)qNvkWD|;KUAnD1DSTB^in~5)=STJ)c0zal#i;u!o<8J9-O=x@ZANkbLH^CjCFf7+ z&K)qW1;yQGf6*P*KNU9z@cB=7rQ<^s^*OIQBX44BipTc;O}G6-nvLRRJ+(IEykqa{ zPVPRnEycr+igsi_=Vsl-Il=8Io*A64JO1#K4^Z40SiJ*TWN+7f!QQZ%YrDL zx3PIJIk{nu?(o!b2gUsw{Ghvd{j(ty&#Z5AlH=FDrQ2C|R4B#8^Z(F&q1Ni|6irEP z--8@g^Q3P4Nyh;l161#+u2Vgyx=r<&>N3?|s=IVJQ5~gvNp+IyA=N#qcU0G?eo@_` z`b2ez>JQZ!swY%Os9sQApn5>xJ$>IcK;JKYxAc9|cSzqKeP{GN(RW1O3w;;#Jy6+G zc~eIZ@eAxlmcq=cmt3pO@O@he0nuKVn><59kZ}f&SnTFaQh$ zVPFs#42FQA;88FP3m~0Z)LjfPdi8k2?_!CW06+2~em4 zyg*e@`F;G)e;@c7^zVdB-wOr$ZYa?ALxGMN3Uu61pks#u9X}N47@|PO5d}JyDA4gl zfsQE(bX-xOV~YYEUliyVqd>_&<3F3;$1DXpZYj{QOM#AG3UmxppyQYV9m^Ex zc&0$dGzB`YDbTS^fsStqbc|D=lEmCr$EO%1v>62(6LW}j(-Z&22h}OfC9Az z6sSF*Ky3mAY8NO_+d#pk|6ATh(4T!d-j7Y~1qEs|C{VjWf!YoV)P7K)HiQDTBNV7D zp+M~k1!_|$P`g5b+7=4bzEGexh61%S6sWDCKM29sZ7#EtzI84W3KH{!NFOzYH?WMj zkv@HfdAkue(&x@HZ#Uvb`m%ZE?MB>4@0VoWZp4l4&PC?!M%;M+q-EyqM%>s=Tj{ro zQQpR@E8|8hRE3%qcV;tSe> z0MHjifEbVfmV;EV17w3!pa5J0cY#-Jqz7#Q-K*;hB0vmC0Lwut*a5P^DNq2efxEz~ z4$=dY9tA1Qx6sVFk>*oWu9sAC*{jO+k}8&lH2z&0?_RlHQpK{8#&1jG zwJX<4s#sRi_+x3j@&DXTljfCa(n5KfG;>Uoriy7&+pkHJ!8B>LJWZPJrAh1MY0`u)O`&q|XP&eNosRGPGOo+ize(xk=nH0f-hNv8u%Iv;4# z1Sd^eK~IyWFlo{ndYUvzNt0I5)1+xgnzW9dCQUTbq?tvUw3MDE%^lLD#q>03c9145 zr>9Bte>7=9Jxw}8XwoS{lg<&EG-*eZR@Kv_X*rs-uAU}MywRkU^)zW}jV7(Fr%97% zG-sE=srK)^Ii<-I{7W|87Kpl>+PW*1=n+tAQ7a298d^K zK@(i7IY1;x1Zf}#6oOLF1lM8?5D5}N8pr{KpcKgK?QOz!TQ2wv{0f?*Gpr{V2bO~E zAP>-h2ru0HXb1X(DPSeo3od}~L2caU>;i^@nP43_2=c)l;Om3FBQO%o2Wj94C<6Zk z&Ct=-1B?Yrz&3Ca6oV?bN7@$j1CznCU=KJCZh%_ol8Mq&vc-_GhU@_PVPJq7w>ey=o`hrPd1=tPFf$xBKQ}nTc!C(eh0}g-> z!9ReHANtTh1egmpf;T}S_yzdkMr|lC?*DpT$xpG6i((xY#WF66Ra_K{xG2_eQ7qx2 zSiwcLfQ#o+t6cw3Q9g?GgDTfQ^jvmPb9pUU+QJ(o}A`iGv&r*i#6&*f9O{-Njc zsa*fibNN)Rf9Sb<{`J>C^!)j8(^vHZ#g&I?2JsCJ>Mld6^5(Ve5(g{n@f7H>xnkYX3A<=fFmqyFMm)h<+ZQnj=@`u`NWP+i=hhfwtnsyeA!-yNNbs$Hn+ zq-u$GQ$Hknb^e-fWz0h|=I12QMufC->GQ|w&jOB@uq)SjFE5D7b>7R%;a_c?_W<+< zQ;1+Euv6LD>;m>(wlDgMDFm=1*a_@Zb~d|!eV6TvzGOW;JA$3SPGx7a3)pwrzUXV# z)3YPk3G7sMHoJg*mrecDdU|#QJAs|b&Sn>|@3MW-U#+KSN3avvsqAca0sAi77ya3K zdUgamft||EW*4yUvVGCtt*2*4uoKv+>}+-c`!3rT{o#6gb_6?toyyK;7qIWLeW}kJ zA%GphPGG09v)KjgyKG*?9lZ%-kCoys$x0**J%3ICjC*`f?(UkI5^Vmvxt9hiQywyDJQr>DF zg(+_}kI9s`nn!5LTcrV;md&0^OB&8;N$t73q`{t+)SgRB8VYJj?YYdPfufewo=Z&{ zMk=S4MwMDNd#+^Cm{Ut?&(%yCk!nfpxuQwqRV}GKS2bxgtDIUI+-lkExvoh=UoELU z*EMNitR=PQx+V>qwWRi3*Q7zUmeii>nl$8APA!eVwQTlW*QD{emeii>nlyUXlG<}! zlg9R1QhTmz(#T&qwKNdeve|Q8lZFXfQhTmz(ja0>YR`2|8e(io?YXW=1CTALJ=Zm9 zc(R;Y8m(;E?76NzXti+mhOIU6TfDTT*+jYtm3|Ikhyh+p^hnU6aOrTT*+jYtpE2OKQ(` zO&T+9N$t6=Nh8YT)Y1TR%Vy7YO&We~N$t6=NrTfZsXf;Yp@E&yrqo5h2gx zv!++nKgD%~Jnzr?{)+l1%?q@qSJXdg-k>$TqW&o^C**mCmiJd&P{{KTt?3o@PnxG_ zO|Phb(mY0MdPV({<~dr@D=sYLd63rhiu$LxwvgvdTHjw$|D<`9*7S<{C(XOGrdQNI z#U+M3Pt)@Lii-?+9;Y?EqW($qJgw;!^-r1yYE7@Gf6_crOM1n{hCGkdnqE==6xSQ_ zyi@D@E9###FV&h}QU9cQtJd_2`lq<;kmtEt-d}OyA0P^S}SOY&Y)yR>P-R`)P5t?ZTJ-4|ZXI*@+I{buaZ@VA}s*v@*tTXL}| zKkss8QOcF1;^=R}ZrE@6m585i{E~k!_s@)~$u;BaMBV3V=-Jo=3X?vM{xa;E{kq>xar?%d z{GW6GnelsamH6sWn4OGJ`TV;7J=a&iMxxY^lv0u*+gFc%hI)Ai4 z)H%CE+Pr_aDtC)Bl6Q!@z5bZE3pxOmr z@$K7w;@lgp#NJoii{$N{#q<~LV({}0(PddrQFlRK@%Oke@nOs`aWHD6NEtRp%;_I3 zBDzl%f%Z7js6&GIxp|T(Y_L=uskuU=|Gr8r{BgY)^ZkpW$KN)K7N5K#et&PT_%iQ+ zIGOW?$k=~WEZcEX#H61U{nouN+ON1Isz3FqxIX);I6viUk@>{;V%1~+5Kj*LSqypj zp6D7_Mbzt5Q`~M;09 zjjmh7cWrhU&2Q!N1S_8>Sou7`f9H9E;>N0K?ar^pNQ>_l$97(WC#m%R`M}z>SZQL% z%pXhE(TeQXog-85bdho6mmYZmPb@fAc)HmJtn}bFyw=i$&s6b+U&cu1uH@9ed!-T+wjzvYQDkg+wq`> znAK0+%fLz%d0u(ralCG6xv2W7omg3Y(T{^4*o_Bt6wOFU-h-7Q9N&EM!(JiIY#7^o z_^Vhs{Lba*?0s0tV%Vd58t=!diTls~kaPe~da2b*yYf0#;+U3`*gFfy|JiQGcO1g; z-*#`68gF1#R(I=x(+^{%i>sT~TznHNw)gNUc<2aLSWHiDx*-=Uag>U2rC37r+=zuc zMjyk9?o$8B2W=;?5=Pv$glA6TsWk77$-SP32WNEr`1e7luu{jfL;4*! zjg=^_olf>Siv=uPJwKd#4iCK8dbr`I=dn`8y{@CXzl$g1MD5`2&pjnp1!F`490Vnh#eV?wpUO+;si(pKC8+C5=@JdfzF)^F+V<$BL1cv6AT{ zvF{!E1S@^)D88@xr?lez=uu0Huu9~(OD}(gC0)868&ur?3#_!^cj|%6D_GGzvHOfV zU*hRAA7^LH`WsdP(YlmgD#lZD*6r*Od<_pGxz&7G`qx-da?r+e_rAeO9DD25oq!Wc zjq1-ncIrDkKqKb&va^j1$4VweU+&MojT4aL z@w<&n@T8m#AB8NugVj*(3|w;ME>;p5mUpW6Pk02^ey8`&pYc4=T8AF3@e5Wunbv8; z^fIjEarTv>i~qz*mu;K1vfsl>ukO@I8-Bw|BUfv`TKYRy#P9LU?a_Z?#hUbcfp6pM z>|Z)}q0I{`eT=wu;u)-l9Cc`NwePB7Rm}QV2Mw-)l}O@7tv^r`E3dr!!DS!(K5};) z&}?39`~ZLU#Dq_=8q3COj@{ksVI->S;LWY|@gR|{9qm|A7Au&rx>IK|T_YA0Xz!K+J=Im=6Py2m`SI z2J|>tHxLV9AfAGOSOf!F&eRRW5*W~l4vGdsZ>|%BaTYnTMvf$INJRdf;o$X?Wv!mJZY&ScFox#px=dttI z#q1Kc{uof&KR>?A#~MB8JD%-kr?4~FS?oM^KD(G*!Zsc!=<^7Fezu+MVu!P% z+3{>QJB6LW&SK}W^V!Ah61EsnzF!};ZEQQ+#SUjjv*X!rb_zR#oyE>$ z=d+91C2TQ>_s_Pm?Q9o2oE^=MXS>-c>jnG&`Q{ zW~Z<-*jem6c0Rk9UBVVac>io0+s<~e!`ad7c($9J!p>l4vGdsZ>|%Ba+juOe&(QLG z*w}Wqiyh96*4+v(?g?lPKb@r0z+q4Ta3i?#x$oZp_n!NnTc>=z@t>z^#a*Qz>-L^Mt2)K=fAp?FPEXjWJNnMUH7V{NU!=R_b^@v!x?^V6 zt3`I-+N(QkM&H^L_qlmRcTwyDR1p-wcMVV#@Wo8suiO6Z0DSHEAd_g>c@*%qMf5d` zVAr+5BX7d z^m}WYQQUu!e{*ul`BS=c2aIb$arfC@bcgj%ZAo#T(_N{(5=DK^>(0oV*qY+8y?@hf zKapmmcv(-a4LR@F`?{06Pi;%_@T29=fcA54Zb$LroZ$B4%;0?8@rR#$fa1=;>K(`; zd%Nxn_U;`insV^6?x=3F{V8rcP^%O9MweZ>v-b7sO!1^npXqjG&h0{R?~e7mlJj?G z>P~Op??H-3@4Twpzim=CikD>gK19xKeL%N++dw4vPCV_(6B^`e#EZo>|}KB*(9POSiM` zs8EWF=l`MmLao)^DVmbpz6Uv~=1JYQm80q0A#POrNq5#W>mH_fl2@l*WY@COy1m8t z-W1PYTBbYw_f!|fqZdEehwOjvyzY{PG5U|`+_K+vyBBQIf82(ZYW?YTKJzc?F8Xn* z{-ZA=p~?XI|JXa5b=%_|11Vm1yZlE|-pm>LkCWtE)d$i4htDYgQQ>#9`(TO}$9|$a z^SjwYC>}qx_E56(+g-ZFVoUK$!nad-H4y2CzN z_Bh3ThP8+y7hTNJoiXIG(G-t;?+4xXK`ZGzF3Qf^o*?H9IHo)K?5MF65AR>1+wb)1 zaTG7^^T2p=X5LBN@x8}Ppt$qIPrAj!>!T@t;aI1MA474Q^PjqJ3>MqNOpGEP!)^%o+leg{G9p2Iv zPjSC3pX)AeHh&JqGhezdfgJC*Pq%YZ|G5+wP5!3)!p4R3D4No+(R^}L>g&2~4F)Ar z{KopPb!XLIx`5(IYmGK1JywdP!mvFUklU*)SlM`^#HZm_W>V3?XoXu z2&nCC44QzZzz^IHs10onS^#P%TY=WV2HJqOfZDtE-~rG9bOh9fbpo9MwNqWegMiwi zhkzYWdlLWx0ktW?zyYY;ptgeA2WlIrJ)ky#j(LWf2i(IeW5x+^@HjJ)dQ*n^u5z}P2Y18SO}g1i@;*A z1S|zl12&mo(F5dTCfhR2PxnMkP0?{jo?L)1~!3o@Dg|# zYzAAvRyax7z1K@RV5M+TvARD{^4udyA4mbjG z!BOxQI0lY`x4{W;66Aq*z$tJVoB?OSIdC3Or~Gl48_*WC1MR^BpabX#{6Qzs8FT?% z!GoY1cnH{m1_D4J2m-;t0YZQigo5s%2j~eN2E9OU-~xR>U(gTq2akXOU?2zsgTP=g z1Pld_f?;4d2nUaW2rvSS1fxJCcpOB5(O?XC0*nRYz<4kLM1zSS2228z!4xnROarlC zI(QPy0C8X@m<48ocrXVffVp5Em=6-c0+0k2f~UYDuox@>OTp8?4VHoB;2E$2JPTHW z=Rh)81y+OS!5Xj@tOM&o3U~pef(>9JcoC$5O&}e-1YQQ4!4|L;Yy;at26zSR06W1h zup8_FdqF0673>4Af&JhBcpV%BS>O=J25*4F;7yPNj(}Wn6ubqFf#cw9Z~~kJdEgyz z3Y-RKz*%q(oCijz!hd+NtFo)HtFvpcYqGuBwb-@Ub=Y;;_1N{<_pyE04cNZyhU`Y{ z#_T5Srffg<{p@D!=Ij>imh4vS)@&QQ4ZAJ79lJgI0d@y=N47t^6T36J3%e`(L3TIx zLu@-+V+XJU*+J}Jwu2qQcCtg+-Pt|ZJ=qVld$D`7UF<&WzU+SN{_IEC1K0!EVeCQd z!R#UIq3lQ5!`Q>w;q1rQ5$qA{k?c|INcQ9GDE4Uf81@tFvFvf|@$3ogX!b;Q40{rL zGJ6VpDtj6`mOY*QBzp!sjy;n-i#?ki&z{3hV9#aGW6x(NvKO$E*bCWDu@|uyvzM@! zvY%$V*~{3=+0U?7u%BhGWIxAFX0KweW#@^1(V86oN!QRQ<#oo={!`{ozWWUPZ$9|2ypM8M+I{P3yi+zZl z&3=P@nEfU@hkb;d%Rb6}i+zlJoc%WY1p6dAkNpn&6#F#$4Erqm9Q!=mP!;~ei(Qpn zja{8xgI$yD&923+&91|)%dW?+&%TfC!*0O#WjACuVmD?tVK-&_vF~R$V>f5FV7Fwq zVz*}7*lpNt+3ncv*$=QgusgE-*`3&(*wV~=M~U`MkjvSZkj*pt~)*i+fl*s<*C>?heX*m3Nc z>{;yD?0EJZb^?1Ydmei}JCVJBoy1}>WM?8EFg**WYZ>|FLy_FL>@ i?BndW*(ca1*?H`D*r(X1*=N{i+2`2j*?%I?pZ^9+nRS2w literal 0 HcmV?d00001 diff --git a/core/src/test/resources/test.feather b/core/src/test/resources/test.feather new file mode 100644 index 0000000000000000000000000000000000000000..4a348d1e2db9287678c524b5a854c2c5710263f7 GIT binary patch literal 56538 zcmeI5d0bW1-v5_~iercqIUa{{%+6*;O^-v0LrSQK<^U)PiGYZRIdn8LGzUUMawLZ& z(?l}FG$rTCTS#7_u6D^=X6E&xm2tD-_xWzVyNA8@ea?FRc%I+${GK1{^?Lhex%WDI zopsh;`|Q1jGcYV{%mht{-w|Tnh42&<8i+=suBanC@E1ZYcf@4w@K^(h~1`Tro% z5TwM-i3#wVmJpSYC@vtsLev%x{C`HZG~E!V*P%dp<}`ESrY9%T|9R-|tm>Tz;ijiA zu9n`0edP4MKGWvJ#mC1{`ZKieum6_E>iajrb~+yRpBFPdF=56$A;R^13gK6k#)ICM zH!zM-7k|%)O2PX=Ibom4iv4_jgxJTgnO?PT?SJ{2iSwi5V}vNx_jMD8tL$US^_}1OIoc+WyTEH{K7bxuCa^!~5D0_tyWerN8&; zuT5pl*%!sUKfQ;LYWq`qHMFcGBb#G6=$%YC(eATEA{j< z%#+)UT6$U=_BY-K9Vcqu^td=q?2awQHoVO5A&5|!QfMSv;uWdIX{5iZsO_%5x6r{= z+o?_R#ujRQC{GvBrchpI7@Ogq)&7?M+=%vuLN>dEJ`{u`Dzs|Bh#UKR)UVo@N);Hv z*`mtEzeckg-*+HvV99 zJcHk7MpyH8BW~mT=JhadH{!PjIrR_%CIDXP2=GQmk#_==k=Iut@NN+=X zS3MYUBfZ1F`c@%~xRJg%z`Wgv8|f>8&D)K*k>1(YyxoW!+dUsMZ#Uw`>)RdX?MB?# zPFv|5Fv{E5yC!awM3q(LL3`2lg4#K%(m`M-7zd(35?Bh>fGuDjI0~p9e+sUHAAvj0 zn`WRB(0~Js0aL*ouo%1uHiOqeAvh08!PnpiP#bNN7w8D=U;r2mrhs^`2&@LLfLw3{ zoCALbUxDv|8_v(Bfa=Mkpg(v5Oa`;TGhh|S275sPI14U=zk#~|U)zKy=m2_y$G|A? z6nGje1nFQCcn!P_&VWzAHE;)@Nfr-)_TUlF4~ztpKpaQ|FMuqt2OI{cfziH>i_PEve~&2EZmrkIZ@=4*=inPUE?Sb!-OXo>}yV!@`ECS}H)8_k>@&72?2oFUDe zBh8#8&73FAoGHzmD<5;Ne9XD>G3Uz1oGTx5u6)e7@-gSi$DAu4bFO^Mx$-sV%GaDL zUvsW}&AIY5=gQZdD_?W2e9gJ?HRsCLoGU+buKdio@-yek&zvhibFTc%x$-mT%FmoD zKXb19%(?P6=gQxlD}QsY{LQ)YH|NUVoGX8GuKdlp@;B$o-<+!ebFKo+xe74nD!`np z0CTPa%()6M=PJOQs{nJZ0?fGzH0LVNoU1@{t^&=u3N+^`(44D4bFKo-xe7GrD$ty( zAakyQ%()6O=PJmYs~~f(g3P%JGUqDDoU0&nu7b?D3O45|*qp0ibFPBTxe7MtD%hN> zU~{g5&AAFT=PJ0`_oWWFJoE=sz+ynx)px*Wpc1sgwJ-pLgH(_O3P35i2U_Af7XZRR zD#!u_pcLE#EpZJC0O24NWPt)u3hsfHxIP7daF7bJKmjNP_drWrhXO!2NCjD-0F;7z zpk-Zb2jL(UWPt)u3hsfHxHkv@;UE=cfdWtp?tzxL84Cd6AQfbR0#FL>ftI*;2ms+A z6=Z<|PzvsWmbj4#0O24NWPt)u3hsfHxPb});UE=cfdWtp?tzxL#|Qx7z_>pt-dX(i zrDd0v7oRB3zw~19QCsoJOU~lL;^X*lr_Jtqv*voUkL%68t~dL+-t6ysbAap3fvz_P zx!xS?db6e~HbuE#*`KYvB}`76rLUr~Pkit_VU zl%Kz%{QMQ==dUO~e?|EPD9SHDQGNl6@(WOuUx1?g0u<#JpeVlpMfn9N$}d1seu0Ye z3sjU}prZT&73CMGD8E2O`2{MQ4`xTRm$7r$C)r=H ze`UAD{UwFI>`Cl}YAsl4pV>dK zo8dl~f)D#ib`pC%`%U(r*tggXai2_~2YWa>mi+?zHTGHdSM1ujkEYOxJ&+yAewMwR z{Vw|o`#!rh?zbrfvm@9G*mQ4B;cfP1_V;X~KhX3HdTeas{P-ERK{2*b)Ez55AD6e9 zpZLgI%@2U&t>))J@>cWXA$hC$X_36u{P0NLYJQd^Z#6$!lDC?lf5}^==l7P)u1iaL zzHdqGy1b<4|CZFQOHAq?u%vcfW>WuwCAI5Pllm9r)KY(gWwYx_CiO>HQoF8ZQh$Xd zwd;x|^=DX8yRK?de}|k}>i@88c3sz`{t-)R*L6+mKe42CUDu@k6-#Q@bxrDjv7~lg z*QEX#IknVZW7+Jwu1Wnlmej86n$+K8N$t9>N&P{V)UNBA)L$g0mimt@n_bs6sej3m z+I3x%`kySRUDq|Kf69{DbzPJCuPmuu*EOksOHM8IcUd;Ou4__%m?gFAx+e9PSyH>M zYf^ujCAI6iCiS<;sipom%VyVgP3oVsq;_4`r2acgYS(p5>ff`Zc3sz`{y$4<*L6+m zACyx|{e_mzuIrlApJ++#x~@t6jh57|>zdRbX-Vz6u1Wosa%!pn(z4lgU6cAZEva4C zHL3s8lG=4$lln(3sa@AKssGfHn)_47R}1?oGTq^$Ku|7&kRGdGX4O&AZ<$Jaw_O{N@k8d3d#b*xqNz=!od}ROjlA+jH}e zpDX_StJ{@sjau4z`1KD7n>;h=nHB4@ckVAJ`rz^xRd*~mY8TflY?$*vdb6Zf5p5hD zx^?#KTG^w#_XVH4fb2fb{;`8XhX#$XjrItu7+*HAXi9EWMod~<0ft4ij@0KOsgZj}?!`P86O~r-?gb zV?@dD+2XAMbHzsg1!8{BB_h1zb0VlGmutgkB~o^K?A7kP=+bG^m=neD}u$z8;|VLin55W9G`e}IVe z>n8^G7$iE`LPYJBqpRu`{kwWf+T+vlbf)>Siq$mAZM%0|ad$lB=$86ri?yig`JYUD zssc|N`e`LOV{zAiZrzCM)Jqr_v=>;8Or-?>ZB!#!i~TLtA9HhbH>*Dr$~z{s=c z-2?Yx2(VjG+ZP&mVPs#smoLs~hFkh4Z(BFD!pO7@sn>q? z#)ySGx4TYljgfz&XU%!9jSyk`|C-;Woe)>*M^wIm$5%6F6+iYJhBj2333+aGM~t-V z^xlO-oiH-c8Qi2r7a`VNJv27$A&k84+vAnLJS@b{?44iq?}jH<_gZ(^*&RLfV>iUr z>4}kphZ-J??IpwqO;-HyAx6QDi~l9~(MN^&;{B9oGcoElw(H-|{D_$scUQzTnCOcU z6xXhfIqoM!T}SxK9WZJ&W7|icFAo%=_5Go?>p?>7jyM@Jybnh56$HMqzb{4}Hp{Dc z0HaR_&FvO255q7new4GQbO1&J^m_2U;DJJvuX?HOwm}%__f7p#VhBbi4q5)v^v5v@ za_7&b=Z4bA@_B80gb0y2<-4f0!!dI5*z~;>BQO%s`^v4csG#=LaXv?$z=)1dE-q+2 zMu;KW0^_!e*6;GeW5A8=JiguZ6h;y*a`wJI86zLdD(6j|iYG$>bC13^4I}e6eEVCM zD2&>?6E!3~8Y81ef4=Iw=@@CaKkVbtF&G(H|HJ0_GlfW)HF#2sSd1_^b8JUioDj=9 z`F;JDr!j)uxxdGNc#M*`+9he{9E_aoyY1~dnDenOyT$$3B+S6Lx7KlSt`H~2)?N81 zW@dyOTKZvT3Pvh6`N{L%e2lD&PYj>908bL%|7OeaG>jPS8u?|1XN359McKp8Ey9e0 zYoYPiG0dfj0+u?mQgf$O~!2wd--KqSCG;JWVwH0{F)1g`rOD(Ewz;L!gSo@^W2 z&UUav*^%rdwv(N~&SB@Vi`d2Na&`rq>H`H&wvBCPJJ_M@NOlt2$|%B~ zyMj&iP(MD~#`-)@hq5EtNo*%OgPp_9V;8ZD+2!mCHl6SK@!2-E zo$X+UvLo3^Y$rQ|ox{#!7qN@k;y&N3-AkQ?T-0d zARMHCOpphtkNg&BPzQI9U?`XYmV+E{0$c$<1Mj-{CJx4dWRL;g1Q)?g;89PAhrtjK z4V+**I1WAsm7t{u?ixTCp#JK0U_bZ(+yHg!W1ImP1R}vwuob)m$^eb=Zr%WQI3N@x zfHhzrxB#vL_lCHG01hw}EC!pwAK}8%&HCRNzxttgQLu|Y)@{2VhvGy=7P_OzP(YR2 zb*C)|MiJo!@6j|aywUjyC=Pr8-+SO~)9pQNq$kD8uYRpNce1l7#gqQh+KU`C>22NOvoXymeqq8*-5I4Tno~Ste1{ff z+vTIWZ-k9&N%6c(w{@q5u4+Yb$H$$$$sVIm=q|n(@gT*sM}Dt6@`JUlDc)^(w>IR8 z^QUwd4xQ4L;?A=_=?)o^iH8Gt|EGJ=`60>%p4XjI6xD&^u>*e5Z9kD^qj+UMts}YU z*oV5)`^@Y_@zA59Gub<6i|+CQ|1K2I^)J?)bol9qC?4cn=V7wQ-=X`0y-!z)W*oe% zJEC`DH;UU1)bCEd(Q~)%ynX$9P&}>sXSyA^^LkR;qid61c|_T0Kyqj*{CRzBpMR}Sfp^&aI*ar>t0x+_~O z^P_mt#y0-s^kxORLo-7IDDK_#JKg2$pAV#XZWCJ&Ice=Xx`P^x52mNT^@QpO)eEW%R1fI0r_b94=<}t|mOfAV4C(Wu&x}4N`i$svq0fRo z2P%6iZz^jlUn)~7PbxzyH!3qKCn_5%7b**S|McGJ{ZhO980Zh^ON;{y1cSg}Fa$gf zhJs-r1Pliwz(_C(JOM_7F(4E?3BtfwFb<3d;a~!Y029F^@D!K~_zNC=xf78f3PgkH zfI=u1v<|t(78r|&Nm8l&QYNAjsl%~6zKe;K<6L@Iu9w( zxky28n_<0ehV?cX*4tv(1}Jo_l761jpPe~Q=PLy|XDQHmOM%W^3UvNbpmUf4oyQdD zT&6(hGX*-QDbRUMfzE9TbbeEybDRR5=M?B%r$FaB1v=*`(0NaR&V34W{!^effC9Ax z6sRqrKyFh{31_}=S->No({v5>d!E9~#Et9D1!=nxH{Mr<(I9eqBW|n@V>22^-fqNg{C*uqLutG} zjypKN`qMbxMl#~Y@k5LTllL*=#_`LHhLpD(apU-vMuW=Rjkt0AAfw^r?MB>49}&YF zSVr7PpFP{W-H03M3lq%SjkuA%GR3^zh#Tp>)6CnAxUoHGk$JljH(oz&nR&YrH@4GO z`mSP>x3PCk+$f1EtIC7+qVI_Gy^X$01p)fbLEi(YiY0-iU=7#;_JN~-UiVXQ9sCGt z*5~=pTc76|zWm<>ZfI+~Kque>27xdT4d~g~a*zpjf_!ialz?mCE^xz5gBR!oe8327xdT4U)lfkO_8zd~gbsfNS6`aBG0{pc9~Hb%Q_{hz7}EImiS%K|VMIO29R6 z7q~ShnmISpe2SX&lBzFzRkL1F_0o{Wzf0rYYt~DuURKih zZE3uA&3Z}I%Ssx5ER8q*?^`d))S_#4sZF;_?Wo<{u4{I4%dXkYZM$YSx9*zV+`eme za|^H8&27A9ms)vCGcV;&Zs_HfUYZ)G$!p_DlixIXbv$W$nawHEA-KCaspINz=VFX}vs6n$V?5 zE9Pm^lr2qKGf$HyWognZEKOQAPm|_ZY0|=ZnlzJ2la|iYq`6X>w0NE-T@5tpa-d1q z15KLXq)992Y0?xXO z(xm0|G->{iCM~F^NmmF>xCQYr;q_y=l zY4VIF&6d%m<@GdaK8z+Uu%}5gUNmWmJx!X^qDhPFY0_0glP(*YbluRTi71-1(w-(w zHPNKC_B3fSi6*VKr%BUAwCZasP~QeU=ci}hivisyzXLu4m7r$5J@ljCel8rOf-Fz~ zO2IwQ68CBWARMHEEKmSS!9CCt_hJDc9HfFQPykB7Js_{Qw+Z)ch2S&rGiZa(uzp|) zSPFK4B0vKo-0<+DGZ+G5z)G+eTmavK26)cd6O00Lz&dac6oWg!%M*P^U>ukavcM5g z2L1_Jqob`am<*PH?cgLR2et5wv=bN%W`O6xYv4S%0qUbut_K(i;=x+*CioasfTnl= z<^#rp6!0=A0H12RpJE{w#X2sEWn2`ixF{BJQLN#j zSi(iIf{SVa7uTg$v;Lu?d=%>k)vSN$y8LR^KXhF_HR~U`E}xq94_%i}&H9I~%co}j zL)Ya~v;Lv$@~K(>&~^Fz?_dAW_4~&|U)2tZI}gpSxc9(wV$~rO_aAtYta=5-JqVsP zt6o8IAA+aRidRtIil|Cbxfr{0HFo84?8^1nl?$>fS7cW%$*x+H9S_nKMXg$u9go^o zub`@vs)gC{bY67`Rh?9=&5j59sza#iq-uF~^Z_Uip<19Fy#%UPP@UbXHQLd!pgM%= z>{i{Tpf5sk2-QOE=&ew_f~roc)@nxwhw2cjI;mQ&9epCIL#XPcYQ=VRqbLrcTC*J; zEvi>g)k)Q|?dY3P9YR$nRV%lnJ4ba0Rh?8V-i{t1#UWJ7x1*0p^$Mywsk(zfx031* zsyeA!#2r0RibJTDaYtX3>J?OVQnivhy0=t^P}ND*V(#b>QyoH8CspgYqZ3VW2-S-2 z=w?&Bf~roc7IjC@oazv&I;mRM9i4fqL#XPcYH4@$|0xcky179Qq3RV>byBsyJ319r zhfvi?)e`Tfen|A{{4w9kn1^J{&q=O1}HuY2M>DgiIWOgPypIySf%l1NlwVs|G#!hBuvh&#`?7M6) z^k?ho*I8ro7cWCR5&O9-%33l?H5DHoGn@X*j1Pwd?Yd276jkyDl+lD5xd1>oSuDids^; zE;VTwshnCGRchJnx{^s_PA#cjS2JluswK7SiYAR$wWM}k)uhp^a%yRCt7Wt6x+V>M zwWM}k*Q9~5mej86nlxBY)-8(m-I#X4iF18YXN>?Ygc>gNQAuUDq{fh_NNL>$)ZlK(?fI zUDu@H$#QCGw6bNh>$)b5WwxYtUDu?M&X&}!>zXtU+LGFJU6V#h%c-S7)0WMy>zXvA z+LGFJU6Te}TT;8OYtnFROKR73O&Y9iN$t9>Nkh5i)Y8aq%VyVgO&a%YN$t9>Nu$Co zsa@AKY0S7Kwd=YjjVPB>O9RX;n_bs6Y52J%wd=Yj4NkYDc3sz`q3f2^uIrjKu-%f{ zbzPI^P07hMX&#kzyX)#E&$F_ocU|G+d05u;uB)6pPs^I#b)}Q%aaqzUZxdYCJ56u+%U-V(5&ed^-ponAkSN~zP{^oSDx2q zP4D{bmFK-#(<{zj#jS%pPtNlCikk;{9-TG4qW($q?5yb(^-r3IXHBoDf6_cXOM1mk zgglSWnqE==6!#JGyg%#fE9###FVLD^QU9cQgVywl`lq;^kmng%USDxTAYw7?LY_BieSJm!ljc=g(<|zqH1E=y zUQz!Pw;1v~P0Q;mZZhO~oYwS;`X|lvw5C_oKWQGQHNB$#N%KT4=@mB{@;p*&dPV(H z+;7P9POY!6sDIMDRBL)g{gdXcTGK1)pW?Pdp66yKgxw(^n_0i^z0jb=&N@z4F!aJr`cjJCJ?I`F8Bl(07C0vz_&L zzv5z9ana@6vWzQf<&j^7+_2yBt`I-m_^J4Q;qN)M)9WQQjCjD&tXnJ3HkIwmJ6(7< zuSa%or%!A^XrG||wm}|4D~6XnQS@Z)_>75ZQzD~6V(f9=bHv;mDaC1pi*uamE0WS9 z);Kb{ZS>5p+)}>d!mhl%*>5-x#=aGLBw_*q0<$3*EXbwy{4 z3UI&RCBA;wTbz5Vz1aJD7m>cBhlqRGE=Ifo;BB0h>9Ee=ME6B(l? ziMc}}MOdGi!q*-znm?Q@er%H_N}DbfN9wH**}tw53%_44CVl&|==;|#qV1nw6~BJ4 zSA0=)K%6XiOXTc7Dwgd$DWbE_ioxqX6kS$a5_O*WR9sKID$d9JP2@iHtyuNsKg83+ zeiS1gyDxhA))I}o*AutfHx?f^Zzl2@JSbkm^2^Ehx`?s2dI&!(8{eWVK-~LNKk@15 zL89igmOi`}Lr48TfUPDN~moc?rK*aIExn>kU}x!Fx>or5mv#XX^RUU%rgh z2|pRIamXgDM4=Uawl^CqEv&P5Xt-I3$XmHH60qdR&EXyQUD}G3AojlU55MjB)faa! z|Lh(3p@-BOcm=ALuBXoss?;R*DGt>d)Wp z72?c>$!*5Gj+MjjT#n4&hm|ZwKk-_N{a7_||Jm=-4&aww>i5^Kyor@KW)-9k$iw-6 zw)gR!hj9LP+FPscTUeFV*)QZ)3&wzMdtI9>EHW+3Bq|6k;Wgdt%Bx zEFpSs?82QBk6}gfLwCM8ejLB<(&Xt!Y$vc1M*Owp=T72RY2Keyc)bWeoYD1@Ux%N< zN*&LS9DLw3R-(9eI^FXu7O-&i`)J-d{NTm5!_7WDkCig+_nO$}ef%;`#GdWjKER4B z1?|2P7qOB@v#jnj{)7=#eY4lCC$nmKFC-?5TO z*%$lsZ{q?KFyU^C3j9*ehK~am-oa`pcZMywau+L!j4nDg;0OE!*Zv@nT|eUYMC%`V zqV7*v>10;-4RMuN$>Z#+Wf%X6l`cEAZg0Pjm0q2h(>MHrl}4^Mc>Uh5SP{SPbGIk{ zjumUN@B6-sk8{Yqc?)fBSm|TztrO2-HROmxGu*#%$Eui3t_~kj7b}s(k6(YF9#&p? z|4)}a@%_lzb!h992KWO0{E2CwVl|eH*8=wRX^fGmj)OP1HNg*wZ0l;T<%y9~_jk;n z(G6sQ#5@>? z6c~v4Fc7IQ5DQ>HKPT%1Vj&E~GcXW~U_i^6I)PXM1NtctClJrVHMlGTlv2&_S`VaB zKp0ddPqvM1XFJ%T>_~PJ+sV#g=dknGMeJgBIlF?b{}d;(;5oP|A2zm~?O=zpBiTu8 zCp&|k!_H$Dv5VQ|>~eMm+xR&_ z&&T=wv+ZmLJCq&CPGURR8SET(9=nKL%r0kFu*J}-<9f1fY&+Y*4rNEOlh{sn20MqH z$1Y+Qv&-2PY%z?F&$hAcYzI4(9m!5&JJ}iR9CjYNh+WJsXIHRA2p^wqW82vdb|^cN zoy2ysGuS!oJa!Sgm|f1UV2j~=e722kXFJ%T>_~PJ+sV#g=dknGMeJgBIlF=_M)2|3 zHnyGZV283J*-30CJA<9W&SMv`i`nJu3bq)@$7kEvcD92Z%8q0wv7PJ;b`Cp_UBoVC zm$NI_#?Nwkj;hLsjcsQ;*rDu5-R-e+KR|Q%=^~v44ucYa2f;PJ`|k07?|0t|8&<71 z{sZjQ2v5zyXb=xpgIsV9do7HH5My*ywjm;si99B=|$0Y3xp z7Ffd_j04Fa1H1_?f}6mjC3>X55D*QVU^_StJ_nVcWh?wXF9-vFgbPd22ZoUGt6wd0 z#Nv;2+wRA?QT)b2_uAyV%I&(-76iLf+;Q&{-5&E3>QKD+dyl&0?Brd#Bkw#`kK)~u z%5+!UPDXV@cg&o|^~ug#dv%A*9@K#1o;R=PE{k1&DuM#|rYWidKA4&Nb=$ulijN&{ zWI8Q7j{?57h(2bK9@P>bCAz`N^K^Sp8;Oq%AK=xmb>~iY;$y-O@%^PWJ_>vglit=X zJ{yCgNB7o*o4PYfSD={j{>OJfaiRiVKC1gh*fBZ;xxU)C?zsKkk zx{EJHJV^2Ek>Bf%{9tWsigz2{tqr;2{3+dqL#MQ*xby5!x0Z=ciL!y` zb>|dCb)b0cfM0amPh{CBUfECUNG>||q3-lPGdoc{^k~&@Kzj#m=}htR0{<@LT>oO- zNr#_)h~h!Mbsi>*{2jV4*!y&)XvV?Ix+8igcB8oMK>hCI8$EaH&fC|&2gTF6f2P}! zJFh3jJ-RmOMK0cxt2?{P;72GPx$CO#Zk^J4Q@kR_>rrxHhXcBu+lSdH9?}j2DiA!k zF4o9pty}q!b6z>5JJx%YFU9SfuIsLBvCNO+MH}1rlhd0O=nl;c4WPJp)9-YbuYW#} z;<-(1LFA;h@8}L{G(MQ(;>CaHzEFR4ABtwAcj-%xsCQDgZRJF|c8DA9Kj_YTZrx)P zPjlFx;IF#aflfTfN_LsCLDejomER5{&*#X_f6GFyPJiGLo?#S^=#!_ zr$$rU7W7ZuHwrdPr+A)!?-}H@!xwZre5c1y+#~;2-Np9onH0}H=rfBP+51nryB(Mn zOYw@HZgJ$oeOq-qy9Yi^@sQk0x;?wb&!%|Up1SemoG!2Gj@{LF4#n-AN_AJ}BqdP1 zs6)df|jt>@3Bcj=TR6K3+P-K55fVR4|EPty{9@)^_=Q9 z)oZHDRDUBu6o>}X0o6&Whg1it-cg;SdPa4N>J`-`sy|eBsJ>7gq545}g6aX)0s7qO zv!>5E4J-uDfJI<2SOS)UXMqze1Ixj4UZunlYnJ3tP273>7Nz;3Vyyax7yT<|*B2i^et!2$3lI0*8< zA&?K=0*Ardpa2{Jh2SW72OI;(!MorDI0=ftd*Bo}4bFhG;2by)DAWROpf+#^bwFKE z4|st3paEzI8iB^333vc_f~LR=Gy}~+3(yj@0^Z<3&>FM>Z9zNG9&`XU&=GV3ok17y z5O^4L1>Hb*&;#@Yy}%=&H+U4-fd+hlFYp8YAOHk{AP@}tfWDv~cntIh1Aqez1cSg} zFa$gfhJs-r1Pliwz(_C(JOM_7F(4E?3BtfwFb<3d;a~!Y029F^@D!K~rhutn8i)i@ zAR0^uGe8WO31)#<5C@(Hvq3zV0}?#TwGQdk96Knt*!OI{EYy#Qf6|fm>0b9W~ zupR6GIp9^W6YK)J!5;7$*b8#O>tG*v1MCL}z?Bqu?EI z3>*jVf)n5*C<5<+Q{Xf>1I~hT;5;xwE&jueU7PLBuEVa&uE+LZ*Jn3iH)J_53*ac+pycR+p*iTJFso+j_gkC&g?Gihu9CZyRy5n zyR&<+d$N16A7S@qKgzbVHMS4im+i;)X9utY*+J}Jb{}?Mc0cxG?EdTlYzKQFdk}jt zdkFh+_E7dPb_jbodjxwVdldT#_GtDPb}0Kvb{KmsdmMW_JDfd%9l@T+p2U8NJ()d) zJ(WF;9m$SjN3*B1XRu?~GugA)vFteZ)9l&oc=jB20y~kN#GcDeX3t}%u;;T=*$dcd z?1k)S*o)YU*-O|<+0U|_>}Blb?C01k*w3?9vR`1QvsbZKvtMMdVXtMcW3OjtuwPvR`NKW52=P z&pyC@lYNk#$3Dc)XTQZh%zm3)z&^q*WFKX}!#>77&VHADf_;)*#D0%`ihY`WhJBWO zj(whOs22a>#;(nFXV+oZW!GbSu<8Jc*=^Wu+3ncv*&Wz6c1Lz6c4u}M_CxH4*Z)~WKUv0#h%Qb!k)^W#*So1v7_13*)!NN?3wIY z>{xaj`)T%Uc079yJAs|ZPGZkxC$s0VQ`qy_sq6*pH1ow)7h)otJyEI*Ra>J*Rj{LGuSV&Gua#18`&?jv)G&1+3Z)?o7r2~ zTiM&#+u1wVIqX;2JK4L~yV-l#ud(;CbJ?%6_p#q#?`I!izsWwx&SM{9=d<5pA7;PJ zE?^&F7qXAC-(eqPA7{VIKEXc8E@HpOKE*!GKEpoCKF2=K{vCn-^aPD?cU>2E`;*+a0)pH2MP})bQY{xc8_k8~OS5buW_a}?&&}* z=mjcqkPBcVhyXTlqZaPVKpF@E-ryEqjmp*u`T@$$ z;5RKHDj`u^(BE@y;lQ?xYH7M5&TmV9|D0w{-1KDqbf=P`@}YM^rQyK8#nsZ=5SP>Y z`b?V>7ay-TFK1}qU;izS)%S0Lv~)b|KQCr_V!{kGAmRMk_*JFxp!eln?HG0O_l&3% zyf2g!_L;2M&({a{x%`^xRr}Wdm#>*PKRO-_W~sifn>bu;-|FRB2gjcopBR;*mr7my zOQDwds%n2XdhIU2C=-g$PnZ`sDN{s%#-1YZGpZW1> z>8gJY^{~x&J1KgZiCPZ)->quy|GSp{-mAYhl`&^u6!ZS{ z9zv?^Pw9=`XbWRH&k`y29;ONZ@ZIy6R!dVFy zC0vzoQ^H*d4}^thtyeaPMO#PTy67n3q=d5)E=ss6;iiPU5*`TO?do0T3TLHZmT*xT zW(ilNVU}=H8fFQ1rD2xvP#R_lPo?oYg_WnLjqfT;aSW`-9aO?e31=l-lyFtTO$m1; zJP?5g)LYFJPD;Zp;jA>w5-v)^Ea9p&%o1)&!z|&hG|UnnO2aJSsWg74u<-QMiSD|E zZcn9Fxee-C*0QNxr*1u+?K5`u8$4^@uu)@&=bnF|NmG5Z<}bEr+3Ka%Z5-P=IlH*JxqEoF zYv1AJS2}tbygT`H_Vx1*2z<3m*Pw2}-Fx)x)w@sMkkGK#UVo!s{{i6yFQKR2{Ys}lmchbjZygTl_%<&W6pEzmql&lY? zPMbbs=7-rI&6+)D?!5Uq3vw4OTD&B0>9XY?uUNTi^_sQo)^GS^)Gq9>${4jO?|uS;VF@EaXnL$;}T+YTH8Vk+e_ZIhwAC)*yyc( zSBuqeMbH(URuDEEL^v-Vs+(@}APC>`$ZX8>1-S*E9DC*y2#I32{WL`72~E&Z!bu5dC0vwnRl-dP zcO^U!6Jj;I%Ut2CG|Un%O2aJSsx-_JZc4)};jT2y5*|v!Ea9m%ey1QRKR02iwKz_0 z${kd~NeO2qT$FHC!c7TxB|H$5lQq26T;ZfN%o5H@!z|&VG|UpNO2aJSrZmhF?n=Wf z;h{9l5}r!qcZy$)$_~FB83@yjfBPLzNZj7lvMg>_8HOua zgo_fcO1LTEu7n4|!n1L@lSa(x494}tzs(lTO2aJSqBP7Bu1do!;ifdq67EXFEa9Ov z%o3hT<97-IyTiC30=w#W&FGrjs?NhK@w7Al{XM!_{kZAPzq21V_4mIT*c<<5U?8qH z`pq{yAz{79yDY3%k5@6Xd|Y2X%yzj>f9qxazL!*7f9xi$AgYTlOxMUXP!iKX$xfrO z(}=A!Hyw%ZFWFZ@G9v0dStC+ef1S;Y}EaUKY%PMX(As z!!9@iC*cfSf?~J}R_HNOWy$S;K2U-n1R@|FhJz8E2{0Yz!BSWU+h89Qz}IjN zuEI^wqGt`Z-~cVa1zrJv=mD?8KuCg7kb&N0$c9{40UIG74#5fd1}?&NxC55x*+2sz zrb}z^0B`65eP93#f>am-ndnW0Ij{uQz^AYWjzS@vg)49aFpecm-IX7$+gYd&va?W4 zgDI9f4-QTCZl9Ltd)8eC` zrPBxP9kiw2n(TPrkN=*qc}~Lq%Ri6F&q?xc)nLkjMac)_+s!Xnni{w=Y<1y^;fH@1 zwd2g1G;1p>%Z&7IjsDGTCyx70Gu~f6eZs%e55~I9nS7yAweh}-rhcChxUA2LnU^cS ze{kT2SwDwcaX^zm0$Hr}o=vew@W(jW1)|BWWA!0Rn^`PRBa)(=MMIhMtZRh!C1WI+{?&DBM6YH>4}*Gzf_ zqkR@fVIbf91q`B@$W1Kn4AyV}PhEo-WnP`Fz70G`? zWNElaRy0l=hv5vtScl>u!?@QYDf}(U=av^>shhE)96US|?@PinLa?_->OH3qgGquzAyo^k8Fd$Bw2-3#7*@1Dp0d-og;}<&Yq!G^O3*{4%Hdm(pRjGds>R+4s>!^Qy>fez1KS%xXhBM8n zKT%{29-fW&8S#u*qE@gM^~c*C@I))Tq^KM9?@9gpQvcVee}C#9N&RD}|6uB$Nc~f& z{|M@jrxsxRo71U3(I=D0kc8KU;89*PssAkMPn5X1fch_{{>!NUO6tFs`hPI3q=9czbp0c zPW`ckENmeSTNr>X_+tx>*n%ClAlSl1qDVm+^(Xq~U<;YpLK3zRf-QJq3(c_w8*Jg` zZ0bLs`Y)vZ*g`h8V8j+;v4vo4!2?@xz!t2qg(9M8ArUU0=(7}C$if!Vu!RBGfnaWz_kv96Z$aJ85WD?P6iW^k=_Q%Sy>vDBj z_-$>;`05#n{=A|1?tVd?%RYJ*7=-sMz%D zFV}iT%khB0vcBU`neIA5hP8Z4`aCyQ+@BdQb~UER&)OOC_03tb^=gjHIk!aK`g(=* zD_AET_iYx3ZQDh&ZjYSJJ0!d37RaJ$C*|Gor)1#UXT@*WMQJ|xs#FaxmP>tZ$zeQx zg}0?l^01bK)^(&?6FYHi&`4_AG?D9-TgaEU+sKA%t}^37dl`P(Abr2|6OV&krT&hd za%)4V{9}24*_IP6b7u~gF%ySM|MU^!mG+iAmoQeeQRC&i&?&N~+YDLUdA5vezd&Nz z=81o^mGWZ4^-`_w7P)M-Lyp|tBP)v!$>d7~l6dB%1fM)5ZI7IlI=e2)e>Po}6Dy15 zlZCfr=0^mbDV7rQuC;iMt|JYG+R5#KjpWqpP2{s4Eo7d*qrCl!n+$O2Achv+@;o7} zl5LQjtKLiYYQkj6PyOY+AEG7pyTKCh_o33_ixE=MKTq0m%2Hl9ZY>rE>d3!7vy*-68%f@>CXzY7g~ZKpl)(4hq~$vu z#Cn9c{1ERiMzjm7}-=ML$ZGvFC+gm zMZ&(HA?^P;TkQX~Kz=!tC*S0+lpPz_OU{Zd@=op!iOAk7ohBcaCK<=1@~E%meBx=@ zAN?Is{d*bT{ogXk??-v{<=fKAStB)GtRhz%)sQdh)s;2X>dObxSdwovk)Bsth|5_= z(G|MMjn6yCSG&Ds^A>;kXmyZ`THH%sn;j+{J{TYk-y0}*-;9@kCMQdNY?>_SH%8KX zWk_V;1o838lBRAm#j@2Lx$s%La6;Uk9`w`W7D+mA8K58Ip~w z7On1dqO(nlSY*f}_15OwzgMk}$i#*-jtrp#wjEug&arxQj)m{`bs_z4;dXbt$SiXmhs| zl8GZnb4H)+$cUp={hgi$l8^BpSk5KE7<8d(7d5mh_DTOBvNI!)+*>F2k#tx|ihT`?hxc;YL{jqUmq$i4=*H-*ZRgM43Kls!`7`ae zBq||ST>X7|ikuBtH7TzbqqRK!m(}`++_0E$m(Z7FC*qlY$3hsn=1*DZ5+*Xeq1E>v zlH4^KZESs;q1V#Fn<@SJF_Lu~x@9K`-?nMr-)$1ki1u2zPi6#3PSDrmFOvj@t?2PZ zP_)Ros)y^X8^{PZrc;44<542FG79x$M>9G+8ytV0q^qXa!L=9cb_8;-MfsOhtBRXcbv$Hqjj?-lM!^QiKipSlgN#?cwzqpM$#AF zjBPPdtZWTTkw8Fim8YUAL-ZrjtMp z-yiht43S-5cAW0>A*0Qz%U$7~$laScIqZ|iNj zYegnJpQo{0kN+DR`^9a*|4)B7@yI9me|wwbZ8qWm*_qEw+l>F;kT-5@!T)=X%njX& z|F6AvVcRzRzviHq9Jb^C@n@6A?!f;KhHXBdkN-FO@|S>J`2V}Fc3Qa`|3AAs(`GOJ z@73XG%0B#m?uNQw?Z^Lbw+d}{5dRNZJa6tH{C{J^b8-a#uRHU_sL%2Lp*0itAI1NV zj@!_p0RL}w@8?O!@c;26I$k@D|6llbMz0h2f9KdkoBoFX=bowc>{s}|g!Ud&i2uKF zY}U7b$N#qmobmYw{%^Od>C#j9|L}H$tAB(4f4Oc=!awoZQJA?mEUfACCEdGDx z*>|$P!~X-O@4I~t|If3w?tdQtul#QJT^I2Gh+k$l{T~0%A9gD968_)t>T_2v1W**x2u8wd#%1Swwe<6@5U@?S+AlV z+v>j4+3{D2+tDvo@GMvR1Pm<}eWvWV9s(2Ffa3GB+0xQ_WV zlITo^Y{&(|t!#vRI0Pqv5F-}>mlxbua7FWHB+;1)b6^Rqflpx%9ECzS3s>Ll-lB}`h@dVC79$1V=-k*l7F)Se^E+!#5s(;yhWL6E^LTmk;1sZ)(wnlwnQNGD4 zbiV(yvyd2h29A}9Gvn2ncw7?R5Q3$8VMWbzFv3L`ejd*DF%G^Oqaq2&ro%VVxyRCx z2h(YL>?ct>j01jw(-Q#-@y>iaXDL>lg~g?vCy~KZ{qZ74EZk06-z^ND4(k8LEF|?G zNBxNgNqA}qUgR~C`p=^NM24FSs6WZn0TQP*E2%#Tk&)ylmP99*Ue#kK^(SeuB0(u4 zIu#N*^6~1WcwE*g>VJm%pQHYGxFgCEHf%si@Kq$WY1q+oRCV7Xvq z{U8`Un0F);-LTOY?bYbVZ7B&wF9G=lhX1hufw2ok^% z)S^Wr$g3PdEyy#1bma(Yv7r&98%IzJDUBfAIf7awXawoW5!3=mBS>$KpceNTLHcq8 zwQ$i063P+OqD&*mYaBr>I5dL1!4cGAN+U>rjvxa#f`oGfwFuG(63G$N0#74IG)GX2 z7mXk>96>G2G=ju&1hweU2r`%>s0EcqkRcpFEfzI`Byt3`5Yh;0nUN9HB2Obo3P(^2 z7>yvqID%T7X#}+_%Lr=WsS#u(M^K9@jUb~rf?68;5#()-Z!@QOv661H zw%ndXl_4A7Hpz~*Drzi=yJ=5OvqG#ZVW8l(Qyk9d$(>D+wff$@pq zrC+dLx3)b}UBBnLMxS*_qrPH)P^CvISfT5X^yZhX{83-ASJqe*b*x;)3g@Y6B{nsz z!~q<^3xZ()B!LmKAP3e!J`}(iD5_y)A%+_2Ee_xaUJwieAPLlKS&##3ARh|g3=}DK z@5fW`^myt%kEh=G@zi}EPu=hF)cqe%J>c=w10PTQ)yGrs@~C=~U=`tc4CwMJ-yxQ~ zc)w3CQjrDsdGuM%pMaizMW)tXSFJQoSwI?Y#zOKR&cj@9bN{Y0RupoM;= z-6{-Rzw$LkBU-rws7_LK95Z199ET#Pso{Hd2!vP|2a8}AoPoPwuVsw_gurl^4(p%* zu7Y(X)&PJ%41^5Gg?#u1?mz?Lum|*kRG12D;3!;yYL%JN249GRbjX42a0+gLoh55O zpcf=V7Vygoas)1cRTb7&fDc4~5$3@*_!@44EzRr#Js=4t!wNVA7s0YB>p;L82EZ7Y z1D`@6+<~!3J=mp7;1*_l)Tmmaw?mzfI1Q=l+Y=f`iCfGj1 z{f8bD{BX>@uTo&JO$j@h&F(dgvnQa(E6EYs>Y%=yEj(?#-@|L#UK%Fp<3F9Ff8 z5lzEJGz}ZkG;BoEun|qeMl=l@(KKvC)36au!$vd>8__guMANVlO~Xbs4I9xkY(!(T zzmJBEXc{)6Y1oLSVI!J`jc6LSENIw>rePzRhK*<%Hlp#%3yB&wqLFlt(6A9r!$vg9 zPp5nm)65B!&ko>_sgysH@@G;0Jj!1{`HLxk8Rf5}{I!(-3FU90{Ld(VC*|*@{DYK# zgz{O}l7E8o3n~8;<&)?RI7j&xDgO%P7g2sO<^M$aw<(_u!Z}Q#WmeI$5lzcRG%Xv^ zv}{DvvJp+oMl>xO(X?zt)3Omw%SJRU8_~3EMANbnP0L0!EgR9aY(&$t5lzcRG%Xv^ zv}{DvvJp+oMl>xO(X?zt)3Omw%SJSk>Y-XTqG{QPre!0VmW^nXpF#PVls}R3vnZcL z+$NjyXH))s%3nzNd6fS#<*%mv^_0Jf^0!j{4$9w6`D_p3ht0NgWm{xp}{F_!WlE9l*W|D-P_P>)zoAz{*2b=b(*XP&(o&|eo2x?T> z7#!d^cphGWCeRf0&&<%p2JM@5_&D;9VF8??EPvhY9dLOoT}=8KyuMd;n8n8cc^7FcUt6Z1@Of!EBfV zb73CLha6Y{xv&rx!D3hfd9W0g!E*Q*R=`SF1*>5Vtc7*39yY)yun{)FX4nFs!dBP@ zpTTz60r{{KcEN7g1AAc~?1u_RcbLDwF{-S6RE#P+t2jn6eSY%c>2q=h4HMW}s00>J87!d+SV2{&2GzkDYCuh>1vXF{ z>Oftn2Rg8YXTT2XLj!mg?4cnvg2vzg&%yKX0yKf9poeDA9A1PL&=Oj~OVAqHfFra8 zCvXNAa0NGT2M_RscF-O=z{~IobObLjfH!mkALtCe;0OK?0D z3B8~<^ntz*0-+EFufgl^2K0meFaW|K0wN&_qG2G!KrF<;AQ%ksFa#1H5t3jiBtr_M z!Y~*PBOnb%!YCLGZ^BzJ2HplEyaVYl7Bb*n7zgh`CX9y(@IFk0NiZ3vKo)!eQ(+oR zhZ!&vK7?%e2xh@-m;-ZR9?XXvSOB@O5Ej8=SOR&l6qdnq_!w5eN>~M}VGXQ>b+8^b zz$dT~Ho<1t0-wTG*an}$cGv;=uoHH{ZrB5RYglzq5lTIjL#anak;>}+i}&Y-O>0iKB{DX8NVu-K z=&YM+ANtkhrqab;bwKL*a=+?X{^|0+3U~i`;qCW-e2t%9=FIe~P)!?kSy9)jw)#Nz zT*(izGd5*1BPS-+&OB#GQ*R6G?!%Lbe2k%sIuDLP%_o45GsG)3UvD;?h50h4+lBUl$otD%$$}c`OtXHUGh*R67 zQ0KtmB`?MBAqGEF`;F8zMfs=|5r_ehH9+j~^kc+b@E zzG*4prk8y>o31C=`M34e{V3BP71lRsc*Kw%qqy?ErS+qbUi?awseJ-U+D9psZ=X<3 zy$6Se^q_qxyps>__l#^au(Ul&s-oUhzCEa~!4T%>>loIn>qwM_J}56>(Eaj!eyh9) z=RR=}e!UHbpa@je^#vGGAJn(2sTKTHArDNE^3}6`VJQ(#zK#Y%sIot6@;{zt6Bpr}%`3A$}Z6gwblER!^Ha|9{q?^7qxZwq9-#^Gj$tL2Lw!dm`@n=uZ&erJ ztpbf9z=w}E`GcQHiQvYgTzf`x>Kzy1 Date: Wed, 27 May 2026 17:12:38 +0200 Subject: [PATCH 17/20] wip writing support for DataFrameIO interface and json, todo: tests --- .../kotlinx/dataframe/impl/api/parse.kt | 8 +- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 232 ++++++++++++++++-- .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 18 +- .../jetbrains/kotlinx/dataframe/io/json.kt | 170 ++++++++++++- ...etbrains.kotlinx.dataframe.io.DataFrameIO} | 0 5 files changed, 388 insertions(+), 40 deletions(-) rename dataframe-json/src/main/resources/META-INF/services/{org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource => org.jetbrains.kotlinx.dataframe.io.DataFrameIO} (100%) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index 641811cbf3..6a01294d32 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -39,8 +39,6 @@ import org.jetbrains.kotlinx.dataframe.api.isFrameColumn import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf import org.jetbrains.kotlinx.dataframe.api.map import org.jetbrains.kotlinx.dataframe.api.parser -import org.jetbrains.kotlinx.dataframe.api.rows -import org.jetbrains.kotlinx.dataframe.api.single import org.jetbrains.kotlinx.dataframe.api.singleOrNull import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.columns.size @@ -57,7 +55,7 @@ import org.jetbrains.kotlinx.dataframe.impl.lazyMapOf import org.jetbrains.kotlinx.dataframe.impl.toResult import org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource import org.jetbrains.kotlinx.dataframe.io.isUrl -import org.jetbrains.kotlinx.dataframe.io.newSupportedFormats +import org.jetbrains.kotlinx.dataframe.io.dataframeReadSources import org.jetbrains.kotlinx.dataframe.io.readSourceImpl import org.jetbrains.kotlinx.dataframe.values import java.math.BigDecimal @@ -962,7 +960,7 @@ internal object Parsers : GlobalParserOptions { source = it, sourceType = typeOf(), options = null, - formats = newSupportedFormats, + formats = dataframeReadSources, resultKind = "DataRow", doStringToUrlConversion = isConverter, read = { source, sourceInfo, options -> @@ -982,7 +980,7 @@ internal object Parsers : GlobalParserOptions { source = it, sourceType = typeOf(), options = null, - formats = newSupportedFormats, + formats = dataframeReadSources, resultKind = "DataFrame", doStringToUrlConversion = isConverter, read = DataFrameReadSource::readDataFrame, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index fbbc3ac97f..fb0d2fc829 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -13,6 +13,7 @@ import org.jetbrains.kotlinx.dataframe.api.CodeString import org.jetbrains.kotlinx.dataframe.api.generateInterfaces import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.api.single +import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import java.io.ByteArrayInputStream import java.io.File @@ -23,15 +24,24 @@ import java.net.URI import java.net.URL import java.nio.file.Path import java.util.ServiceLoader +import kotlin.io.path.Path +import kotlin.io.path.exists import kotlin.io.path.extension +import kotlin.io.path.isRegularFile import kotlin.io.path.name import kotlin.reflect.KType import kotlin.reflect.full.withNullability import kotlin.reflect.typeOf +public sealed interface DataFrameIO { + // `DataFrame.Companion.read/write` methods uses this to sort list of all supported formats in ascending order (-1, 2, 10) + // sorted list is used to test if any format can read/write the given input + public val testOrder: Int +} + public interface DataFrameReadOptions -public interface DataFrameReadSource { +public interface DataFrameReadSource : DataFrameIO { /** * The set of source [KType]s this format knows how to read. The framework uses this in the default * [acceptsSource] implementation, and overriding `acceptsSource` implementations should still consult it @@ -72,6 +82,43 @@ public interface DataFrameReadSource { public fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean } +internal typealias DataFrameReadSourceFunction = + DataFrameReadSource.( + source: Any, + sourceInfo: DataSourceInfo, + options: DataFrameReadOptions?, + ) -> Result + +public interface DataFrameWriteOptions + +public interface DataFrameWriteTarget : DataFrameIO { + public val supportedWritingTypes: Set + + public fun acceptsTarget(sourceInfo: DataSourceInfo, options: DataFrameWriteOptions?): Boolean + + public fun writeDataFrame( + dataFrame: DataFrame<*>, + target: Any, + targetInfo: DataSourceInfo, + options: DataFrameWriteOptions? = null, + ): Result + + public fun writeDataRow( + dataRow: DataRow<*>, + target: Any, + targetInfo: DataSourceInfo, + options: DataFrameWriteOptions? = null, + ): Result +} + +internal typealias DataFrameWriteTargetFunction = + DataFrameWriteTarget.( + dataFrameLike: T, + target: Any, + targetInfo: DataSourceInfo, + options: DataFrameWriteOptions?, + ) -> Result + /** * Description of a source passed to [DataFrameReadSource]. Carries the static [kType] of the value and * optional [extension]/[mimeType] hints, both of which may be `null` when the source is in-memory content @@ -84,22 +131,34 @@ public data class DataSourceInfo( public val mimeType: String? = null, ) +@PublishedApi +internal val dataFrameIO: List by lazy { + ( + ServiceLoader.load(DataFrameIO::class.java).toList() + + ServiceLoader.load(DataFrameReadSource::class.java).toList() + + ServiceLoader.load(DataFrameWriteTarget::class.java).toList() + ).distinct() + .sortedBy { it.testOrder } +} + /** * NOTE: Needs to have fully qualified name in * resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource * to be detected here. */ @PublishedApi -internal val newSupportedFormats: List by lazy { - ServiceLoader.load(DataFrameReadSource::class.java) - .toList() - .distinct() - .sortedBy { it.testOrder } +internal val dataframeReadSources: List by lazy { + dataFrameIO.filterIsInstance() +} + +@PublishedApi +internal val dataframeWriteTargets: List by lazy { + dataFrameIO.filterIsInstance() } internal val dataFrameReadSourceByType: Map> by lazy { buildMap> { - newSupportedFormats.forEach { format -> + dataframeReadSources.forEach { format -> format.supportedReadingTypes.forEach { type -> getOrPut(type) { mutableListOf() }.let { if (format !in it) it += format @@ -112,6 +171,21 @@ internal val dataFrameReadSourceByType: Map> by } } +internal val dataframeWriteTargetByType: Map> by lazy { + buildMap> { + dataframeWriteTargets.forEach { format -> + format.supportedWritingTypes.forEach { type -> + getOrPut(type) { mutableListOf() }.let { + if (format !in it) it += format + } + } + } + values.forEach { + it.sortBy { it.testOrder } + } + } +} + /** * Shared dispatch loop for [readDataFrameImpl] and [readDataFrameSchemaImpl]: handles String→URL * normalization, InputStream buffering, sorted iteration, and error aggregation. The per-format read @@ -127,14 +201,10 @@ internal fun readSourceImpl( formats: List, resultKind: String, doStringToUrlConversion: Boolean, - read: DataFrameReadSource.( - source: Any, - sourceInfo: DataSourceInfo, - options: DataFrameReadOptions?, - ) -> Result, + read: DataFrameReadSourceFunction, ): Result { - if (doStringToUrlConversion && source is String) { - val url = asUrlOrNull(source) + if (doStringToUrlConversion && sourceType == typeOf()) { + val url = asUrlOrNull(source as String) if (url != null) { return readSourceImpl( source = url, @@ -184,6 +254,64 @@ internal fun readSourceImpl( ) } +internal fun writeTargetImpl( + source: T, + target: Any, + targetType: KType, + options: DataFrameWriteOptions?, + formats: List, + sourceKind: String, + doStringToPathConversion: Boolean, + write: DataFrameWriteTargetFunction, +): Result { + if (doStringToPathConversion && targetType == typeOf()) { + val path = Path(target as String) + if (path.exists() && path.isRegularFile()) { + return writeTargetImpl( + source = source, + target = path, + targetType = typeOf(), + options = options, + formats = formats, + sourceKind = sourceKind, + doStringToPathConversion = true, + write = write, + ) + } + } + + val targetInfo = DataSourceInfo( + kType = targetType, + extension = target.extensionOrNull(), + mimeType = target.mimeTypeOrNull(), + ) + + val formats = formats.sortedBy { it.testOrder } + .filter { it.acceptsTarget(targetInfo, options) } + + if (formats.isEmpty()) { + return Result.failure( + IllegalStateException( + "Failed to find a suitable format for writing $sourceKind to target: $target, $targetInfo", + ), + ) + } + if (formats.size > 1) { + return Result.failure( + IllegalStateException( + "Multiple formats found for writing $sourceKind to target: $target, $targetInfo; ${ + formats.map { + it::class.simpleName + } + } . Please specify a `DataFrameWriteOptions` explicitly.", + ), + ) + } + val format = formats.single() + val result = format.write(source, target, targetInfo, options) + return result +} + /** * Unified entry point for the [DataFrameReadSource] framework: passes [source] through every registered * format until one reads it. @@ -200,7 +328,7 @@ public fun DataFrame.Companion.readSource( source: Any, type: KType, options: DataFrameReadOptions? = null, - formats: List = newSupportedFormats, + formats: List = dataframeReadSources, ): AnyFrame = readSourceImpl( source = source, @@ -215,14 +343,14 @@ public fun DataFrame.Companion.readSource( public inline fun DataRow.Companion.readSource( source: R, options: DataFrameReadOptions? = null, - formats: List = newSupportedFormats, + formats: List = dataframeReadSources, ): AnyRow = readSource(source = source, type = typeOf(), options = options, formats = formats) public fun DataRow.Companion.readSource( source: Any, type: KType, options: DataFrameReadOptions? = null, - formats: List = newSupportedFormats, + formats: List = dataframeReadSources, ): AnyRow = readSourceImpl( source = source, @@ -239,7 +367,7 @@ public fun DataRow.Companion.readSource( public inline fun DataFrame.Companion.readSource( source: R, options: DataFrameReadOptions? = null, - formats: List = newSupportedFormats, + formats: List = dataframeReadSources, ): AnyFrame = readSource( source = source, @@ -258,7 +386,7 @@ public fun DataFrameSchema.Companion.readSource( source: Any, type: KType, options: DataFrameReadOptions? = null, - formats: List = newSupportedFormats, + formats: List = dataframeReadSources, ): DataFrameSchema = readSourceImpl( source = source, @@ -273,7 +401,7 @@ public fun DataFrameSchema.Companion.readSource( public inline fun DataFrameSchema.Companion.readSource( source: R, options: DataFrameReadOptions? = null, - formats: List = newSupportedFormats, + formats: List = dataframeReadSources, ): DataFrameSchema = readSource( source = source, @@ -298,7 +426,7 @@ public fun CodeString.Companion.readSource( type: KType, name: String, options: DataFrameReadOptions? = null, - formats: List = newSupportedFormats, + formats: List = dataframeReadSources, ): CodeString = readSourceImpl( source = source, @@ -316,7 +444,7 @@ public inline fun CodeString.Companion.readSource( source: R, name: String, options: DataFrameReadOptions? = null, - formats: List = newSupportedFormats, + formats: List = dataframeReadSources, ): CodeString = readSource( source = source, @@ -326,6 +454,66 @@ public inline fun CodeString.Companion.readSource( formats = formats, ) +public fun DataFrame<*>.write( + target: Any, + type: KType, + options: DataFrameWriteOptions? = null, + formats: List = dataframeWriteTargets, +) { + writeTargetImpl( + source = this, + target = target, + targetType = type.withNullability(false), + options = options, + formats = formats, + sourceKind = "DataFrame", + doStringToPathConversion = true, + write = DataFrameWriteTarget::writeDataFrame, + ).getOrThrow() +} + +public inline fun DataFrame<*>.write( + target: W, + options: DataFrameWriteOptions? = null, + formats: List = dataframeWriteTargets, +): Unit = + write( + target = target, + type = typeOf(), + options = options, + formats = formats, + ) + +public fun DataRow<*>.write( + target: Any, + type: KType, + options: DataFrameWriteOptions? = null, + formats: List = dataframeWriteTargets, +) { + writeTargetImpl( + source = this, + target = target, + targetType = type.withNullability(false), + options = options, + formats = formats, + sourceKind = "DataRow", + doStringToPathConversion = true, + write = DataFrameWriteTarget::writeDataRow, + ).getOrThrow() +} + +public inline fun DataRow<*>.write( + target: W, + options: DataFrameWriteOptions? = null, + formats: List = dataframeWriteTargets, +): Unit = + write( + target = target, + type = typeOf(), + options = options, + formats = formats, + ) + private val tikaDetector by lazy { DefaultDetector() } internal fun Any.mimeTypeOrNull(): String? { diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index 84bee18a69..7c969e69aa 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -38,16 +38,16 @@ class Guess2 { Path("../data/participants.json").absolute().normalize().toUri().toURL(), ) shouldBe expected - val options = org.jetbrains.kotlinx.dataframe.io.Json.Options( + val readOptions = org.jetbrains.kotlinx.dataframe.io.Json.ReadOptions( typeClashTactic = JSON.TypeClashTactic.ANY_COLUMNS, ) - DataFrame.readSource("../data/participants.json", options) shouldBe expected - DataFrame.readSource(Path("../data/participants.json"), options) shouldBe expected - DataFrame.readSource(File("../data/participants.json"), options) shouldBe expected + DataFrame.readSource("../data/participants.json", readOptions) shouldBe expected + DataFrame.readSource(Path("../data/participants.json"), readOptions) shouldBe expected + DataFrame.readSource(File("../data/participants.json"), readOptions) shouldBe expected DataFrame.readSource( Path("../data/participants.json").absolute().normalize().toUri().toURL(), - options, + readOptions, ) shouldBe expected } @@ -61,13 +61,13 @@ class Guess2 { DataFrame.readSource(file.inputStream()) shouldBe expected DataFrame.readSource(Json.decodeFromString(file.readText())) shouldBe expected - val options = org.jetbrains.kotlinx.dataframe.io.Json.Options( + val readOptions = org.jetbrains.kotlinx.dataframe.io.Json.ReadOptions( typeClashTactic = JSON.TypeClashTactic.ANY_COLUMNS, ) - DataFrame.readSource(file.readText(), options) shouldBe expected - DataFrame.readSource(file.inputStream(), options) shouldBe expected - DataFrame.readSource(Json.decodeFromString(file.readText()), options) shouldBe expected + DataFrame.readSource(file.readText(), readOptions) shouldBe expected + DataFrame.readSource(file.inputStream(), readOptions) shouldBe expected + DataFrame.readSource(Json.decodeFromString(file.readText()), readOptions) shouldBe expected } @Test diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 630e6543b6..3d96e4fd4f 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -2,7 +2,9 @@ package org.jetbrains.kotlinx.dataframe.io import kotlinx.serialization.ExperimentalSerializationApi import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonArray import kotlinx.serialization.json.JsonElement +import kotlinx.serialization.json.JsonObject import kotlinx.serialization.json.decodeFromStream import org.intellij.lang.annotations.Language import org.jetbrains.kotlinx.dataframe.AnyFrame @@ -29,16 +31,20 @@ import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS import java.io.File import java.io.InputStream +import java.io.OutputStream import java.net.URL import java.nio.file.Path import kotlin.io.path.writeText import kotlin.reflect.KType import kotlin.reflect.full.isSubtypeOf +import kotlin.reflect.full.isSupertypeOf import kotlin.reflect.typeOf -public class Json : DataFrameReadSource { +public class Json : + DataFrameReadSource, + DataFrameWriteTarget { - public data class Options( + public data class ReadOptions( val header: List = emptyList(), val typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, val keyValuePaths: List = emptyList(), @@ -55,6 +61,20 @@ public class Json : DataFrameReadSource { typeOf(), ) + public data class WriteOptions(val prettyPrint: Boolean = false) : DataFrameWriteOptions + + override val supportedWritingTypes: Set = + setOf( + typeOf(), + typeOf(), + typeOf(), + typeOf(), + // used like df.write({ json: JsonElement -> }) + typeOf>(), + typeOf>(), + typeOf>(), + ) + public companion object { internal const val EXTENSION = "json" internal val MIME_TYPES = setOf( @@ -65,8 +85,117 @@ public class Json : DataFrameReadSource { ) } + override fun acceptsTarget(sourceInfo: DataSourceInfo, options: DataFrameWriteOptions?): Boolean { + if (options != null && options !is WriteOptions) return false + if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false + if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false + return supportedWritingTypes.any { sourceInfo.kType.isSubtypeOf(it) } + } + + override fun writeDataRow( + dataRow: DataRow<*>, + target: Any, + targetInfo: DataSourceInfo, + options: DataFrameWriteOptions?, + ): Result = + runCatching { + val opts = (options ?: WriteOptions()) as WriteOptions + val kType = targetInfo.kType + + @Suppress("RedundantReturnKeyword") + return@runCatching when { + kType.isSubTypeOf() -> + dataRow.writeJson(path = target as Path, prettyPrint = opts.prettyPrint) + + kType.isSubTypeOf() -> + dataRow.writeJson(path = (target as File).toPath(), prettyPrint = opts.prettyPrint) + + kType.isSubTypeOf() -> + dataRow.writeJson(writer = target as Appendable, prettyPrint = opts.prettyPrint) + + kType.isSubTypeOf() -> + dataRow.writeJson(stream = target as OutputStream, prettyPrint = opts.prettyPrint) + + kType.isSubTypeOf>() -> { + (target as Function1).invoke( + dataRow.toJsonElement(prettyPrint = opts.prettyPrint), + ) + Unit + } + + kType.isSubTypeOf>() -> + return Result.failure( + IllegalArgumentException( + "Can only turn a single DataRow into a JsonObject. A DataFrame can only be converted to a JsonArray.", + ), + ) + + kType.isSubTypeOf>() -> { + (target as Function1).invoke( + dataRow.toJson(prettyPrint = opts.prettyPrint), + ) + Unit + } + + else -> return Result.failure( + IllegalStateException("Unsupported target type for JSON writing: $kType"), + ) + } + } + + override fun writeDataFrame( + dataFrame: DataFrame<*>, + target: Any, + targetInfo: DataSourceInfo, + options: DataFrameWriteOptions?, + ): Result = + runCatching { + val opts = (options ?: WriteOptions()) as WriteOptions + val kType = targetInfo.kType + + @Suppress("RedundantReturnKeyword") + return@runCatching when { + kType.isSubTypeOf() -> + dataFrame.writeJson(path = target as Path, prettyPrint = opts.prettyPrint) + + kType.isSubTypeOf() -> + dataFrame.writeJson(path = (target as File).toPath(), prettyPrint = opts.prettyPrint) + + kType.isSubTypeOf() -> + dataFrame.writeJson(writer = target as Appendable, prettyPrint = opts.prettyPrint) + + kType.isSubTypeOf() -> + dataFrame.writeJson(stream = target as OutputStream, prettyPrint = opts.prettyPrint) + + kType.isSubTypeOf>() -> { + (target as Function1).invoke( + dataFrame.toJsonElement(prettyPrint = opts.prettyPrint), + ) + Unit + } + + kType.isSubTypeOf>() -> + return Result.failure( + IllegalArgumentException( + "Can only turn a single DataRow into a JsonObject. A DataFrame can only be converted to a JsonArray.", + ), + ) + + kType.isSubTypeOf>() -> { + (target as Function1).invoke( + dataFrame.toJson(prettyPrint = opts.prettyPrint), + ) + Unit + } + + else -> return Result.failure( + IllegalStateException("Unsupported target type for JSON writing: $kType"), + ) + } + } + override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { - if (options != null && options !is Options) return false + if (options != null && options !is ReadOptions) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } @@ -79,7 +208,7 @@ public class Json : DataFrameReadSource { options: DataFrameReadOptions?, ): Result> = runCatching { - val opts = (options ?: Options()) as Options + val opts = (options ?: ReadOptions()) as ReadOptions val kType = sourceInfo.kType val url: URL? = when { @@ -143,6 +272,8 @@ public class Json : DataFrameReadSource { private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) +private inline fun KType.isSuperTypeOf(): Boolean = this.isSupertypeOf(typeOf()) + public class JSON( private val typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, private val keyValuePaths: List = emptyList(), @@ -457,6 +588,29 @@ public fun AnyFrame.toJson(prettyPrint: Boolean = false): String { return json.encodeToString(JsonElement.serializer(), encodeFrame(this@toJson)) } +public fun AnyFrame.toJsonElement(prettyPrint: Boolean = false): JsonArray { + val json = Json { + this.prettyPrint = prettyPrint + isLenient = true + allowSpecialFloatingPointValues = true + } + val res = json.encodeToJsonElement(JsonElement.serializer(), encodeFrame(this@toJsonElement)) + return res as JsonArray +} + +public fun AnyRow.toJsonElement(prettyPrint: Boolean = false): JsonObject { + val json = Json { + this.prettyPrint = prettyPrint + isLenient = true + allowSpecialFloatingPointValues = true + } + val res = json.encodeToJsonElement( + JsonElement.serializer(), + encodeRow(this@toJsonElement.df(), this@toJsonElement.index()), + ) + return res as JsonObject +} + /** * Converts the DataFrame to a JSON string representation with additional metadata about serialized data. * It is heavily used to implement some integration features in Kotlin Notebook IntelliJ IDEA plugin. @@ -568,6 +722,10 @@ public fun AnyFrame.writeJson(writer: Appendable, prettyPrint: Boolean = false) writer.append(toJson(prettyPrint)) } +public fun AnyFrame.writeJson(stream: OutputStream, prettyPrint: Boolean = false) { + stream.write(toJson(prettyPrint).toByteArray()) +} + public fun AnyRow.writeJson(file: File, prettyPrint: Boolean = false) { writeJson(file.toPath(), prettyPrint) } @@ -584,6 +742,10 @@ public fun AnyRow.writeJson(writer: Appendable, prettyPrint: Boolean = false) { writer.append(toJson(prettyPrint)) } +public fun AnyRow.writeJson(stream: OutputStream, prettyPrint: Boolean = false) { + stream.write(toJson(prettyPrint).toByteArray()) +} + private const val READ_JSON = "readJson" internal class DefaultReadJsonMethod(path: String?, arguments: MethodArguments) : diff --git a/dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource b/dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameIO similarity index 100% rename from dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameReadSource rename to dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.DataFrameIO From 9ea6776e5db3aa75993d8c92ad6dd4ceb8713899 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 28 May 2026 15:57:10 +0200 Subject: [PATCH 18/20] added some tests for df.write for json --- .../kotlinx/dataframe/impl/api/convert.kt | 6 +- .../kotlinx/dataframe/impl/api/parse.kt | 4 +- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 67 ++++---- .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 153 ++++++++++++++++++ 4 files changed, 191 insertions(+), 39 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt index c734a6b101..29a4f452f8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt @@ -223,7 +223,7 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n DataFrame.readSource( source = source, type = from, - options = null, + readOptions = null, ) } @@ -232,7 +232,7 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n DataRow.readSource( source = source, type = from, - options = null, + readOptions = null, ) } @@ -241,7 +241,7 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n DataFrameSchema.readSource( source = source, type = from, - options = null, + readOptions = null, ) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index 6a01294d32..ee87240081 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -959,7 +959,7 @@ internal object Parsers : GlobalParserOptions { readSourceImpl( source = it, sourceType = typeOf(), - options = null, + readOptions = null, formats = dataframeReadSources, resultKind = "DataRow", doStringToUrlConversion = isConverter, @@ -979,7 +979,7 @@ internal object Parsers : GlobalParserOptions { readSourceImpl( source = it, sourceType = typeOf(), - options = null, + readOptions = null, formats = dataframeReadSources, resultKind = "DataFrame", doStringToUrlConversion = isConverter, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index fb0d2fc829..844c65f450 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -13,7 +13,6 @@ import org.jetbrains.kotlinx.dataframe.api.CodeString import org.jetbrains.kotlinx.dataframe.api.generateInterfaces import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.api.single -import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import java.io.ByteArrayInputStream import java.io.File @@ -187,7 +186,7 @@ internal val dataframeWriteTargetByType: Map> } /** - * Shared dispatch loop for [readDataFrameImpl] and [readDataFrameSchemaImpl]: handles String→URL + * Shared dispatch loop for each [readSource]: handles String→URL * normalization, InputStream buffering, sorted iteration, and error aggregation. The per-format read * operation is supplied as [read]; [resultKind] is used only in the "unknown source" error message. * @@ -197,7 +196,7 @@ internal val dataframeWriteTargetByType: Map> internal fun readSourceImpl( source: Any, sourceType: KType, - options: DataFrameReadOptions?, + readOptions: DataFrameReadOptions?, formats: List, resultKind: String, doStringToUrlConversion: Boolean, @@ -209,7 +208,7 @@ internal fun readSourceImpl( return readSourceImpl( source = url, sourceType = typeOf(), - options = options, + readOptions = readOptions, formats = formats, resultKind = resultKind, doStringToUrlConversion = true, @@ -239,8 +238,8 @@ internal fun readSourceImpl( val tries = mutableMapOf() formats.sortedBy { it.testOrder }.forEach { - if (!it.acceptsSource(sourceInfo, options)) return@forEach - val result = it.read(getSource(), sourceInfo, options) + if (!it.acceptsSource(sourceInfo, readOptions)) return@forEach + val result = it.read(getSource(), sourceInfo, readOptions) result .onSuccess { return Result.success(it) } .onFailure { e -> @@ -258,7 +257,7 @@ internal fun writeTargetImpl( source: T, target: Any, targetType: KType, - options: DataFrameWriteOptions?, + writeOptions: DataFrameWriteOptions?, formats: List, sourceKind: String, doStringToPathConversion: Boolean, @@ -271,7 +270,7 @@ internal fun writeTargetImpl( source = source, target = path, targetType = typeOf(), - options = options, + writeOptions = writeOptions, formats = formats, sourceKind = sourceKind, doStringToPathConversion = true, @@ -287,7 +286,7 @@ internal fun writeTargetImpl( ) val formats = formats.sortedBy { it.testOrder } - .filter { it.acceptsTarget(targetInfo, options) } + .filter { it.acceptsTarget(targetInfo, writeOptions) } if (formats.isEmpty()) { return Result.failure( @@ -308,7 +307,7 @@ internal fun writeTargetImpl( ) } val format = formats.single() - val result = format.write(source, target, targetInfo, options) + val result = format.write(source, target, targetInfo, writeOptions) return result } @@ -327,13 +326,13 @@ internal fun writeTargetImpl( public fun DataFrame.Companion.readSource( source: Any, type: KType, - options: DataFrameReadOptions? = null, + readOptions: DataFrameReadOptions? = null, formats: List = dataframeReadSources, ): AnyFrame = readSourceImpl( source = source, sourceType = type.withNullability(false), - options = options, + readOptions = readOptions, formats = formats, resultKind = "DataFrame", doStringToUrlConversion = true, @@ -342,20 +341,20 @@ public fun DataFrame.Companion.readSource( public inline fun DataRow.Companion.readSource( source: R, - options: DataFrameReadOptions? = null, + readOptions: DataFrameReadOptions? = null, formats: List = dataframeReadSources, -): AnyRow = readSource(source = source, type = typeOf(), options = options, formats = formats) +): AnyRow = readSource(source = source, type = typeOf(), readOptions = readOptions, formats = formats) public fun DataRow.Companion.readSource( source: Any, type: KType, - options: DataFrameReadOptions? = null, + readOptions: DataFrameReadOptions? = null, formats: List = dataframeReadSources, ): AnyRow = readSourceImpl( source = source, sourceType = type.withNullability(false), - options = options, + readOptions = readOptions, formats = formats, resultKind = "DataRow", doStringToUrlConversion = true, @@ -366,13 +365,13 @@ public fun DataRow.Companion.readSource( public inline fun DataFrame.Companion.readSource( source: R, - options: DataFrameReadOptions? = null, + readOptions: DataFrameReadOptions? = null, formats: List = dataframeReadSources, ): AnyFrame = readSource( source = source, type = typeOf(), - options = options, + readOptions = readOptions, formats = formats, ) @@ -385,13 +384,13 @@ public inline fun DataFrame.Companion.readSource( public fun DataFrameSchema.Companion.readSource( source: Any, type: KType, - options: DataFrameReadOptions? = null, + readOptions: DataFrameReadOptions? = null, formats: List = dataframeReadSources, ): DataFrameSchema = readSourceImpl( source = source, sourceType = type.withNullability(false), - options = options, + readOptions = readOptions, formats = formats, resultKind = "DataFrameSchema", doStringToUrlConversion = true, @@ -400,13 +399,13 @@ public fun DataFrameSchema.Companion.readSource( public inline fun DataFrameSchema.Companion.readSource( source: R, - options: DataFrameReadOptions? = null, + readOptions: DataFrameReadOptions? = null, formats: List = dataframeReadSources, ): DataFrameSchema = readSource( source = source, type = typeOf(), - options = options, + readOptions = readOptions, formats = formats, ) @@ -425,13 +424,13 @@ public fun CodeString.Companion.readSource( source: Any, type: KType, name: String, - options: DataFrameReadOptions? = null, + readOptions: DataFrameReadOptions? = null, formats: List = dataframeReadSources, ): CodeString = readSourceImpl( source = source, sourceType = type.withNullability(false), - options = options, + readOptions = readOptions, formats = formats, resultKind = "CodeString", doStringToUrlConversion = true, @@ -443,28 +442,28 @@ public fun CodeString.Companion.readSource( public inline fun CodeString.Companion.readSource( source: R, name: String, - options: DataFrameReadOptions? = null, + readOptions: DataFrameReadOptions? = null, formats: List = dataframeReadSources, ): CodeString = readSource( source = source, type = typeOf(), name = name, - options = options, + readOptions = readOptions, formats = formats, ) public fun DataFrame<*>.write( target: Any, type: KType, - options: DataFrameWriteOptions? = null, + writeOptions: DataFrameWriteOptions? = null, formats: List = dataframeWriteTargets, ) { writeTargetImpl( source = this, target = target, targetType = type.withNullability(false), - options = options, + writeOptions = writeOptions, formats = formats, sourceKind = "DataFrame", doStringToPathConversion = true, @@ -474,27 +473,27 @@ public fun DataFrame<*>.write( public inline fun DataFrame<*>.write( target: W, - options: DataFrameWriteOptions? = null, + writeOptions: DataFrameWriteOptions? = null, formats: List = dataframeWriteTargets, ): Unit = write( target = target, type = typeOf(), - options = options, + writeOptions = writeOptions, formats = formats, ) public fun DataRow<*>.write( target: Any, type: KType, - options: DataFrameWriteOptions? = null, + writeOptions: DataFrameWriteOptions? = null, formats: List = dataframeWriteTargets, ) { writeTargetImpl( source = this, target = target, targetType = type.withNullability(false), - options = options, + writeOptions = writeOptions, formats = formats, sourceKind = "DataRow", doStringToPathConversion = true, @@ -504,13 +503,13 @@ public fun DataRow<*>.write( public inline fun DataRow<*>.write( target: W, - options: DataFrameWriteOptions? = null, + writeOptions: DataFrameWriteOptions? = null, formats: List = dataframeWriteTargets, ): Unit = write( target = target, type = typeOf(), - options = options, + writeOptions = writeOptions, formats = formats, ) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index 7c969e69aa..03131803e6 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -1,9 +1,12 @@ package org.jetbrains.kotlinx.dataframe.io +import io.kotest.assertions.throwables.shouldThrow import io.kotest.matchers.shouldBe import io.kotest.matchers.string.shouldContain import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonArray import kotlinx.serialization.json.JsonElement +import kotlinx.serialization.json.JsonObject import org.apache.poi.ss.usermodel.WorkbookFactory import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow @@ -15,10 +18,13 @@ import org.jetbrains.kotlinx.dataframe.api.named import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.api.single import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.io.Json.WriteOptions import org.jetbrains.kotlinx.dataframe.io.db.H2 import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import org.junit.Test +import java.io.ByteArrayOutputStream import java.io.File +import java.nio.file.Files import java.sql.Connection import java.sql.DriverManager import javax.sql.DataSource @@ -641,4 +647,151 @@ class Guess2 { } // endregion + + // region DataFrame.write / DataRow.write — write to various JSON targets + + @Test + fun `write DataFrame as JSON to Path`() { + val df = DataFrame.readJson("../data/participants.json") + val tempPath = Files.createTempFile("guess2-write-df", ".json") + .also { it.toFile().deleteOnExit() } + df.write(tempPath) + DataFrame.readJson(tempPath) shouldBe df + } + + @Test + fun `write DataFrame as JSON to File`() { + val df = DataFrame.readJson("../data/participants.json") + val tempFile = Files.createTempFile("guess2-write-df", ".json").toFile() + .also { it.deleteOnExit() } + df.write(tempFile) + DataFrame.readJson(tempFile) shouldBe df + } + + @Test + fun `write DataFrame as JSON to String pointing at existing file`() { + // doStringToPathConversion in writeTargetImpl only fires when the path already exists; + // createTempFile creates the file, so the String → Path routing kicks in. + val df = DataFrame.readJson("../data/participants.json") + val tempFile = Files.createTempFile("guess2-write-df-str", ".json").toFile() + .also { it.deleteOnExit() } + df.write(tempFile.path) + DataFrame.readJson(tempFile) shouldBe df + } + + @Test + fun `write DataFrame as JSON to Appendable`() { + val df = DataFrame.readJson("../data/participants.json") + val sb = StringBuilder() + // StringBuilder is reified — pin Appendable so the framework dispatches to that branch. + df.write(sb) + DataFrame.readJsonStr(sb.toString()) shouldBe df + } + + @Test + fun `write DataFrame as JSON to OutputStream`() { + val df = DataFrame.readJson("../data/participants.json") + val baos = ByteArrayOutputStream() + df.write(baos) + DataFrame.readJsonStr(baos.toString()) shouldBe df + } + + @Test + fun `write DataFrame as JSON to Function1 of JsonArray`() { + val df = DataFrame.readJson("../data/participants.json") + var captured: JsonArray? = null + df.write({ it: JsonArray -> captured = it }) + captured shouldBe df.toJsonElement() + } + + @Test + fun `write DataFrame as JSON to Function1 of String`() { + val df = DataFrame.readJson("../data/participants.json") + var captured: String? = null + df.write({ it: String -> captured = it }) + captured shouldBe df.toJson() + } + + @Test + fun `write DataFrame as JSON to Function1 of JsonObject fails`() { + // A DataFrame can only be converted to a JsonArray, not a JsonObject. + val df = DataFrame.readJson("../data/participants.json") + shouldThrow { df.write({ _: JsonObject -> }) } + } + + @Test + fun `write DataRow as JSON to Path`() { + val row = DataFrame.readJsonStr("""[{"a": 1, "b": "x"}]""").single() + val tempPath = Files.createTempFile("guess2-write-row", ".json") + .also { it.toFile().deleteOnExit() } + row.write(tempPath) + DataRow.readJson(tempPath) shouldBe row + } + + @Test + fun `write DataRow as JSON to File`() { + val row = DataFrame.readJsonStr("""[{"a": 1, "b": "x"}]""").single() + val tempFile = Files.createTempFile("guess2-write-row", ".json").toFile() + .also { it.deleteOnExit() } + row.write(tempFile) + DataRow.readJson(tempFile) shouldBe row + } + + @Test + fun `write DataRow as JSON to Appendable`() { + val row = DataFrame.readJsonStr("""[{"a": 1, "b": "x"}]""").single() + val sb = StringBuilder() + row.write(sb) + sb.toString() shouldBe row.toJson() + } + + @Test + fun `write DataRow as JSON to OutputStream`() { + val row = DataFrame.readJsonStr("""[{"a": 1, "b": "x"}]""").single() + val baos = ByteArrayOutputStream() + row.write(baos) + baos.toString() shouldBe row.toJson() + } + + @Test + fun `write DataRow as JSON to Function1 of JsonObject`() { + val row = DataFrame.readJsonStr("""[{"a": 1, "b": "x"}]""").single() + var captured: JsonObject? = null + row.write({ it: JsonObject -> captured = it }) + captured shouldBe row.toJsonElement() + } + + @Test + fun `write DataRow as JSON to Function1 of String`() { + val row = DataFrame.readJsonStr("""[{"a": 1, "b": "x"}]""").single() + var captured: String? = null + row.write({ it: String -> captured = it }) + captured shouldBe row.toJson() + } + + @Test + fun `write DataRow as JSON to Function1 of JsonArray fails`() { + // A single DataRow can only be turned into a JsonObject, not a JsonArray. + val row = DataFrame.readJsonStr("""[{"a": 1, "b": "x"}]""").single() + shouldThrow { row.write({ _: JsonArray -> }) } + } + + @Test + fun `write DataFrame as JSON with prettyPrint option produces multi-line output`() { + val df = DataFrame.readJsonStr("""[{"a": 1, "b": "x"}]""") + val sb = StringBuilder() + df.write(sb, WriteOptions(prettyPrint = true)) + sb.toString() shouldContain "\n" + DataFrame.readJsonStr(sb.toString()) shouldBe df + } + + @Test + fun `write DataFrame with unsupported target type fails`() { + // Int is not a supported writing type for any registered format → no format accepts it, + // and writeTargetImpl reports "Failed to find a suitable format". + val df = DataFrame.readJsonStr("""[{"a": 1}]""") + shouldThrow { df.write(42) } + } + + // endregion } From 32a5a5889b8e02d9ca4c7fa23f04fdbf341cc190 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 28 May 2026 16:25:37 +0200 Subject: [PATCH 19/20] rename all read-Options classes to ReadOptions --- .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 44 +++++++++---------- dataframe-arrow/api/dataframe-arrow.api | 18 ++++---- .../kotlinx/dataframe/io/arrowReading.kt | 20 ++++----- dataframe-csv/api/dataframe-csv.api | 12 ++--- .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 6 +-- .../org/jetbrains/kotlinx/dataframe/io/tsv.kt | 6 +-- dataframe-excel/api/dataframe-excel.api | 6 +-- .../jetbrains/kotlinx/dataframe/io/xlsx.kt | 6 +-- dataframe-jdbc/api/dataframe-jdbc.api | 6 +-- .../jetbrains/kotlinx/dataframe/io/Jdbc.kt | 26 +++++------ dataframe-json/api/dataframe-json.api | 6 +-- .../jetbrains/kotlinx/dataframe/io/json.kt | 30 ++++++++++--- .../api/dataframe-openapi-generator.api | 6 +-- .../jetbrains/kotlinx/dataframe/io/OpenApi.kt | 6 +-- 14 files changed, 109 insertions(+), 89 deletions(-) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index 03131803e6..2e2efecaf1 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -88,7 +88,7 @@ class Guess2 { Path(csvPath).absolute().normalize().toUri().toURL(), ) shouldBe expected - val options = Csv.Options(delimiter = ',') + val options = Csv.ReadOptions(delimiter = ',') DataFrame.readSource(csvPath, options) shouldBe expected DataFrame.readSource(Path(csvPath), options) shouldBe expected @@ -105,7 +105,7 @@ class Guess2 { val expected = DataFrame.readCsv(file) // String content has no extension hint, so we pin the format via options. - val options = Csv.Options(delimiter = ',') + val options = Csv.ReadOptions(delimiter = ',') DataFrame.readSource(file.readText(), options) shouldBe expected DataFrame.readSource(file.inputStream(), options) shouldBe expected @@ -123,7 +123,7 @@ class Guess2 { Path(tsvFile.path).absolute().normalize().toUri().toURL(), ) shouldBe expected - val options = Tsv.Options(delimiter = '\t') + val options = Tsv.ReadOptions(delimiter = '\t') DataFrame.readSource(tsvFile.path, options) shouldBe expected DataFrame.readSource(Path(tsvFile.path), options) shouldBe expected @@ -138,7 +138,7 @@ class Guess2 { fun `read TSV in memory`() { val tsvFile = File("src/test/resources/abc.tsv") val expected = DataFrame.readTsv(tsvFile) - val options = Tsv.Options(delimiter = '\t') + val options = Tsv.ReadOptions(delimiter = '\t') // Binary/text without extension — options pin Tsv over Csv/Json/Xlsx. DataFrame.readSource(tsvFile.readText(), options) shouldBe expected @@ -157,7 +157,7 @@ class Guess2 { Path(xlsxFile.path).absolute().normalize().toUri().toURL(), ) shouldBe expected - val options = ExcelNEW.Options(sheetName = "Sheet1") + val options = ExcelNEW.ReadOptions(sheetName = "Sheet1") DataFrame.readSource(xlsxFile.path, options) shouldBe expected DataFrame.readSource(Path(xlsxFile.path), options) shouldBe expected @@ -192,7 +192,7 @@ class Guess2 { DataFrame.readSource(wb.getSheetAt(0)) shouldBe expected } - val options = ExcelNEW.Options() + val options = ExcelNEW.ReadOptions() // Binary streams have no extension and are accepted by every format, // so options are needed to pin ExcelNEW for the InputStream variant. @@ -234,8 +234,8 @@ class Guess2 { seed(conn) val expected = DataFrame.readSqlTable(conn, "Customer") - val tableOpts = Jdbc2.Options(sqlQueryOrTableName = "Customer") - val queryOpts = Jdbc2.Options(sqlQueryOrTableName = "SELECT * FROM Customer") + val tableOpts = Jdbc2.ReadOptions(sqlQueryOrTableName = "Customer") + val queryOpts = Jdbc2.ReadOptions(sqlQueryOrTableName = "SELECT * FROM Customer") // Connection — exclusive type, but query/table name must come from options. DataFrame.readSource(conn, tableOpts) shouldBe expected @@ -273,7 +273,7 @@ class Guess2 { ps.executeQuery().use { rs -> DataFrame.readSource( rs, - Jdbc2.Options(dbType = H2()), + Jdbc2.ReadOptions(dbType = H2()), ) shouldBe expected } } @@ -281,7 +281,7 @@ class Guess2 { ps.executeQuery().use { rs -> DataFrame.readSource( rs, - Jdbc2.Options(resultSetConnection = conn), + Jdbc2.ReadOptions(resultSetConnection = conn), ) shouldBe expected } } @@ -317,10 +317,10 @@ class Guess2 { val config = DbConnectionConfig(url = url) val expected = DataFrame.readSqlTable(config, "Customer") - val tableOpts = Jdbc2.Options(sqlQueryOrTableName = "Customer") + val tableOpts = Jdbc2.ReadOptions(sqlQueryOrTableName = "Customer") DataFrame.readSource(config, tableOpts) shouldBe expected - DataFrame.readSource(config, Jdbc2.Options(sqlQueryOrTableName = "SELECT * FROM Customer")) shouldBe expected + DataFrame.readSource(config, Jdbc2.ReadOptions(sqlQueryOrTableName = "SELECT * FROM Customer")) shouldBe expected } @Test @@ -357,8 +357,8 @@ class Guess2 { DriverManager.getConnection(url).use { conn -> seed(conn) val expected = DataFrameSchema.readSqlTable(conn, "Customer") - val tableOpts = Jdbc2.Options(sqlQueryOrTableName = "Customer") - val queryOpts = Jdbc2.Options(sqlQueryOrTableName = "SELECT * FROM Customer") + val tableOpts = Jdbc2.ReadOptions(sqlQueryOrTableName = "Customer") + val queryOpts = Jdbc2.ReadOptions(sqlQueryOrTableName = "SELECT * FROM Customer") DataFrameSchema.readSource(conn, tableOpts) shouldBe expected DataFrameSchema.readSource(conn, queryOpts) shouldBe expected @@ -384,7 +384,7 @@ class Guess2 { conn.prepareStatement("SELECT * FROM Customer").executeQuery(), H2(), ) - val schema = DataFrameSchema.readSource(rs, Jdbc2.Options(dbType = H2())) + val schema = DataFrameSchema.readSource(rs, Jdbc2.ReadOptions(dbType = H2())) schema shouldBe expected rs.isBeforeFirst shouldBe true } @@ -404,7 +404,7 @@ class Guess2 { Path(featherFile.path).absolute().normalize().toUri().toURL(), ) shouldBe expected - val options = ArrowFeatherNEW.Options() + val options = ArrowFeatherNEW.ReadOptions() DataFrame.readSource(featherFile.path, options) shouldBe expected DataFrame.readSource(featherFile, options) shouldBe expected @@ -414,7 +414,7 @@ class Guess2 { fun `read Arrow Feather in memory`() { val featherFile = File("src/test/resources/test.feather") val expected = DataFrame.readArrowFeather(featherFile) - val options = ArrowFeatherNEW.Options() + val options = ArrowFeatherNEW.ReadOptions() // ByteArray, InputStream, SeekableByteChannel all need options to disambiguate (no extension). DataFrame.readSource(featherFile.readBytes(), options) shouldBe expected @@ -436,7 +436,7 @@ class Guess2 { Path(ipcFile.path).absolute().normalize().toUri().toURL(), ) shouldBe expected - val options = ArrowIPC.Options() + val options = ArrowIPC.ReadOptions() DataFrame.readSource(ipcFile, options) shouldBe expected } @@ -452,7 +452,7 @@ class Guess2 { Path(parquetFile.path).absolute().normalize().toUri().toURL(), ) shouldBe expected - val options = Parquet.Options() + val options = Parquet.ReadOptions() DataFrame.readSource(parquetFile, options) shouldBe expected } @@ -505,14 +505,14 @@ class Guess2 { fun `read DataRow from CSV string`() { val csvText = "a,b,c\n1,2,3" val expected = DataFrame.readCsvStr(csvText).single() - DataRow.readSource(csvText, Csv.Options()) shouldBe expected + DataRow.readSource(csvText, Csv.ReadOptions()) shouldBe expected } @Test fun `read DataRow from TSV string`() { val tsvText = "a\tb\tc\n1\t2\t3" val expected = DataFrame.readTsvStr(tsvText).single() - DataRow.readSource(tsvText, Tsv.Options()) shouldBe expected + DataRow.readSource(tsvText, Tsv.ReadOptions()) shouldBe expected } @Test @@ -538,7 +538,7 @@ class Guess2 { seed(conn) val query = "SELECT * FROM Customer WHERE id = 1" val expected = DataFrame.readSqlQuery(conn, query).single() - DataRow.readSource(conn, Jdbc2.Options(sqlQueryOrTableName = query)) shouldBe expected + DataRow.readSource(conn, Jdbc2.ReadOptions(sqlQueryOrTableName = query)) shouldBe expected } } diff --git a/dataframe-arrow/api/dataframe-arrow.api b/dataframe-arrow/api/dataframe-arrow.api index 4a098bd002..ffc8fe35d5 100644 --- a/dataframe-arrow/api/dataframe-arrow.api +++ b/dataframe-arrow/api/dataframe-arrow.api @@ -24,13 +24,13 @@ public final class org/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW : org/jetb public final class org/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$Companion { } -public final class org/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { +public final class org/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$ReadOptions : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { public fun ()V public fun (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;)V public synthetic fun (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILkotlin/jvm/internal/DefaultConstructorMarker;)V public final fun component1 ()Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions; - public final fun copy (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;)Lorg/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$Options; - public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$Options;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$Options; + public final fun copy (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;)Lorg/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$ReadOptions; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$ReadOptions;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/ArrowFeatherNEW$ReadOptions; public fun equals (Ljava/lang/Object;)Z public final fun getNullability ()Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions; public fun hashCode ()I @@ -52,14 +52,14 @@ public final class org/jetbrains/kotlinx/dataframe/io/ArrowIPC : org/jetbrains/k public final class org/jetbrains/kotlinx/dataframe/io/ArrowIPC$Companion { } -public final class org/jetbrains/kotlinx/dataframe/io/ArrowIPC$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { +public final class org/jetbrains/kotlinx/dataframe/io/ArrowIPC$ReadOptions : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { public fun ()V public fun (Lorg/apache/arrow/memory/RootAllocator;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;)V public synthetic fun (Lorg/apache/arrow/memory/RootAllocator;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILkotlin/jvm/internal/DefaultConstructorMarker;)V public final fun component1 ()Lorg/apache/arrow/memory/RootAllocator; public final fun component2 ()Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions; - public final fun copy (Lorg/apache/arrow/memory/RootAllocator;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;)Lorg/jetbrains/kotlinx/dataframe/io/ArrowIPC$Options; - public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/ArrowIPC$Options;Lorg/apache/arrow/memory/RootAllocator;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/ArrowIPC$Options; + public final fun copy (Lorg/apache/arrow/memory/RootAllocator;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;)Lorg/jetbrains/kotlinx/dataframe/io/ArrowIPC$ReadOptions; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/ArrowIPC$ReadOptions;Lorg/apache/arrow/memory/RootAllocator;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/ArrowIPC$ReadOptions; public fun equals (Ljava/lang/Object;)Z public final fun getAllocator ()Lorg/apache/arrow/memory/RootAllocator; public final fun getNullability ()Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions; @@ -396,14 +396,14 @@ public final class org/jetbrains/kotlinx/dataframe/io/Parquet : org/jetbrains/ko public final class org/jetbrains/kotlinx/dataframe/io/Parquet$Companion { } -public final class org/jetbrains/kotlinx/dataframe/io/Parquet$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { +public final class org/jetbrains/kotlinx/dataframe/io/Parquet$ReadOptions : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { public fun ()V public fun (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;J)V public synthetic fun (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;JILkotlin/jvm/internal/DefaultConstructorMarker;)V public final fun component1 ()Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions; public final fun component2 ()J - public final fun copy (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;J)Lorg/jetbrains/kotlinx/dataframe/io/Parquet$Options; - public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Parquet$Options;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;JILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Parquet$Options; + public final fun copy (Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;J)Lorg/jetbrains/kotlinx/dataframe/io/Parquet$ReadOptions; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Parquet$ReadOptions;Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions;JILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Parquet$ReadOptions; public fun equals (Ljava/lang/Object;)Z public final fun getBatchSize ()J public final fun getNullability ()Lorg/jetbrains/kotlinx/dataframe/api/NullabilityOptions; diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt index 94df26773e..2902a29a25 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt @@ -47,11 +47,11 @@ public class ArrowFeather : SupportedDataFrameFormat { * - In-memory: [SeekableByteChannel], [ByteArray], [InputStream], [ArrowReader] * * Default-accepts the `.feather` extension. To read with no extension hint (e.g., an [InputStream]) pass - * an [Options] instance to disambiguate from text formats. + * an [ReadOptions] instance to disambiguate from text formats. */ public class ArrowFeatherNEW : DataFrameReadSource { - public data class Options(val nullability: NullabilityOptions = NullabilityOptions.Infer) : DataFrameReadOptions + public data class ReadOptions(val nullability: NullabilityOptions = NullabilityOptions.Infer) : DataFrameReadOptions override val supportedReadingTypes: Set = setOf( @@ -69,7 +69,7 @@ public class ArrowFeatherNEW : DataFrameReadSource { } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { - if (options != null && options !is Options) return false + if (options != null && options !is ReadOptions) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } } @@ -80,7 +80,7 @@ public class ArrowFeatherNEW : DataFrameReadSource { options: DataFrameReadOptions?, ): Result> = runCatching { - val opts = (options ?: Options()) as Options + val opts = (options ?: ReadOptions()) as ReadOptions val kType = sourceInfo.kType // ArrowReader is exclusive; check before more general types. @@ -136,7 +136,7 @@ public class ArrowFeatherNEW : DataFrameReadSource { */ public class ArrowIPC : DataFrameReadSource { - public data class Options( + public data class ReadOptions( val allocator: RootAllocator = Allocator.ROOT, val nullability: NullabilityOptions = NullabilityOptions.Infer, ) : DataFrameReadOptions @@ -157,7 +157,7 @@ public class ArrowIPC : DataFrameReadSource { } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { - if (options != null && options !is Options) return false + if (options != null && options !is ReadOptions) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } } @@ -168,7 +168,7 @@ public class ArrowIPC : DataFrameReadSource { options: DataFrameReadOptions?, ): Result> = runCatching { - val opts = (options ?: Options()) as Options + val opts = (options ?: ReadOptions()) as ReadOptions val kType = sourceInfo.kType if (kType.isSubTypeOf()) { @@ -222,7 +222,7 @@ public class ArrowIPC : DataFrameReadSource { */ public class Parquet : DataFrameReadSource { - public data class Options( + public data class ReadOptions( val nullability: NullabilityOptions = NullabilityOptions.Infer, val batchSize: Long = ARROW_PARQUET_DEFAULT_BATCH_SIZE, ) : DataFrameReadOptions @@ -239,7 +239,7 @@ public class Parquet : DataFrameReadSource { } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { - if (options != null && options !is Options) return false + if (options != null && options !is ReadOptions) return false if (sourceInfo.extension?.lowercase()?.equals(EXTENSION) == false) return false if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } @@ -251,7 +251,7 @@ public class Parquet : DataFrameReadSource { options: DataFrameReadOptions?, ): Result> = runCatching { - val opts = (options ?: Options()) as Options + val opts = (options ?: ReadOptions()) as ReadOptions val kType = sourceInfo.kType return@runCatching when { kType.isSubTypeOf() -> diff --git a/dataframe-csv/api/dataframe-csv.api b/dataframe-csv/api/dataframe-csv.api index d8c4ca4133..978124af85 100644 --- a/dataframe-csv/api/dataframe-csv.api +++ b/dataframe-csv/api/dataframe-csv.api @@ -13,7 +13,7 @@ public final class org/jetbrains/kotlinx/dataframe/io/Csv : org/jetbrains/kotlin public final class org/jetbrains/kotlinx/dataframe/io/Csv$Companion { } -public final class org/jetbrains/kotlinx/dataframe/io/Csv$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { +public final class org/jetbrains/kotlinx/dataframe/io/Csv$ReadOptions : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { public fun ()V public fun (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)V public synthetic fun (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILkotlin/jvm/internal/DefaultConstructorMarker;)V @@ -31,8 +31,8 @@ public final class org/jetbrains/kotlinx/dataframe/io/Csv$Options : org/jetbrain public final fun component7 ()Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; public final fun component8 ()Z public final fun component9 ()Z - public final fun copy (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)Lorg/jetbrains/kotlinx/dataframe/io/Csv$Options; - public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Csv$Options;CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Csv$Options; + public final fun copy (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)Lorg/jetbrains/kotlinx/dataframe/io/Csv$ReadOptions; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Csv$ReadOptions;CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Csv$ReadOptions; public fun equals (Ljava/lang/Object;)Z public final fun getAllowMissingColumns ()Z public final fun getCharset ()Ljava/nio/charset/Charset; @@ -190,7 +190,7 @@ public final class org/jetbrains/kotlinx/dataframe/io/Tsv : org/jetbrains/kotlin public final class org/jetbrains/kotlinx/dataframe/io/Tsv$Companion { } -public final class org/jetbrains/kotlinx/dataframe/io/Tsv$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { +public final class org/jetbrains/kotlinx/dataframe/io/Tsv$ReadOptions : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { public fun ()V public fun (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)V public synthetic fun (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILkotlin/jvm/internal/DefaultConstructorMarker;)V @@ -208,8 +208,8 @@ public final class org/jetbrains/kotlinx/dataframe/io/Tsv$Options : org/jetbrain public final fun component7 ()Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; public final fun component8 ()Z public final fun component9 ()Z - public final fun copy (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)Lorg/jetbrains/kotlinx/dataframe/io/Tsv$Options; - public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Tsv$Options;CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Tsv$Options; + public final fun copy (CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)Lorg/jetbrains/kotlinx/dataframe/io/Tsv$ReadOptions; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Tsv$ReadOptions;CLjava/util/List;Ljava/nio/charset/Charset;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Tsv$ReadOptions; public fun equals (Ljava/lang/Object;)Z public final fun getAllowMissingColumns ()Z public final fun getCharset ()Ljava/nio/charset/Charset; diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 67b33dc628..1dc1250c4f 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -40,7 +40,7 @@ public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITE public class Csv : DataFrameReadSource { - public data class Options( + public data class ReadOptions( val delimiter: Char = DelimParams.CSV_DELIMITER, val header: List = DelimParams.HEADER, val charset: Charset? = DelimParams.CHARSET, @@ -70,7 +70,7 @@ public class Csv : DataFrameReadSource { } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { - if (options != null && options !is Options) return false + if (options != null && options !is ReadOptions) return false if (sourceInfo.extension != null && sourceInfo.extension !in EXTENSIONS) return false if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } @@ -82,7 +82,7 @@ public class Csv : DataFrameReadSource { options: DataFrameReadOptions?, ): Result> = runCatching { - val opts = (options ?: Options()) as Options + val opts = (options ?: ReadOptions()) as ReadOptions val kType = sourceInfo.kType val url: URL? = when { diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index 9e70aee554..567ec78f66 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -40,7 +40,7 @@ public class TsvDeephaven(private val delimiter: Char = DelimParams.TSV_DELIMITE public class Tsv : DataFrameReadSource { - public data class Options( + public data class ReadOptions( val delimiter: Char = DelimParams.TSV_DELIMITER, val header: List = DelimParams.HEADER, val charset: Charset? = DelimParams.CHARSET, @@ -70,7 +70,7 @@ public class Tsv : DataFrameReadSource { } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { - if (options != null && options !is Options) return false + if (options != null && options !is ReadOptions) return false if (sourceInfo.extension != null && sourceInfo.extension !in EXTENSIONS) return false if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPE) return false return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } @@ -82,7 +82,7 @@ public class Tsv : DataFrameReadSource { options: DataFrameReadOptions?, ): Result> = runCatching { - val opts = (options ?: Options()) as Options + val opts = (options ?: ReadOptions()) as ReadOptions val kType = sourceInfo.kType val url: URL? = when { diff --git a/dataframe-excel/api/dataframe-excel.api b/dataframe-excel/api/dataframe-excel.api index 0320ffc31d..9d29cb5790 100644 --- a/dataframe-excel/api/dataframe-excel.api +++ b/dataframe-excel/api/dataframe-excel.api @@ -24,7 +24,7 @@ public final class org/jetbrains/kotlinx/dataframe/io/ExcelNEW : org/jetbrains/k public final class org/jetbrains/kotlinx/dataframe/io/ExcelNEW$Companion { } -public final class org/jetbrains/kotlinx/dataframe/io/ExcelNEW$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { +public final class org/jetbrains/kotlinx/dataframe/io/ExcelNEW$ReadOptions : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { public synthetic fun (Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZILkotlin/jvm/internal/DefaultConstructorMarker;)V public synthetic fun (Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZLkotlin/jvm/internal/DefaultConstructorMarker;)V public final fun component1 ()Ljava/lang/String; @@ -35,8 +35,8 @@ public final class org/jetbrains/kotlinx/dataframe/io/ExcelNEW$Options : org/jet public final fun component6 ()Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy; public final fun component7 ()Z public final fun component8 ()Z - public final fun copy-vOPuZIo (Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZ)Lorg/jetbrains/kotlinx/dataframe/io/ExcelNEW$Options; - public static synthetic fun copy-vOPuZIo$default (Lorg/jetbrains/kotlinx/dataframe/io/ExcelNEW$Options;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/ExcelNEW$Options; + public final fun copy-vOPuZIo (Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZ)Lorg/jetbrains/kotlinx/dataframe/io/ExcelNEW$ReadOptions; + public static synthetic fun copy-vOPuZIo$default (Lorg/jetbrains/kotlinx/dataframe/io/ExcelNEW$ReadOptions;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/ExcelNEW$ReadOptions; public fun equals (Ljava/lang/Object;)Z public final fun getColumns ()Ljava/lang/String; public final fun getFirstRowIsHeader ()Z diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index b4efa33926..f0c27dd1a6 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -69,7 +69,7 @@ public class Excel : SupportedDataFrameFormat { public class ExcelNEW : DataFrameReadSource { - public data class Options( + public data class ReadOptions( val sheetName: String? = null, val skipRows: Int = 0, val columns: String? = null, @@ -103,7 +103,7 @@ public class ExcelNEW : DataFrameReadSource { } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { - if (options != null && options !is Options) return false + if (options != null && options !is ReadOptions) return false val ext = sourceInfo.extension?.lowercase() if (ext != null && ext !in EXTENSIONS) return false val mime = sourceInfo.mimeType?.lowercase() @@ -117,7 +117,7 @@ public class ExcelNEW : DataFrameReadSource { options: DataFrameReadOptions?, ): Result> = runCatching { - val opts = (options ?: Options()) as Options + val opts = (options ?: ReadOptions()) as ReadOptions val kType = sourceInfo.kType val url: URL? = when { diff --git a/dataframe-jdbc/api/dataframe-jdbc.api b/dataframe-jdbc/api/dataframe-jdbc.api index 4b3c3fc4e3..104fab5d6c 100644 --- a/dataframe-jdbc/api/dataframe-jdbc.api +++ b/dataframe-jdbc/api/dataframe-jdbc.api @@ -36,7 +36,7 @@ public final class org/jetbrains/kotlinx/dataframe/io/Jdbc2 : org/jetbrains/kotl public fun toString ()Ljava/lang/String; } -public final class org/jetbrains/kotlinx/dataframe/io/Jdbc2$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { +public final class org/jetbrains/kotlinx/dataframe/io/Jdbc2$ReadOptions : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { public fun ()V public fun (Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;Ljava/sql/Connection;)V public synthetic fun (Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;Ljava/sql/Connection;ILkotlin/jvm/internal/DefaultConstructorMarker;)V @@ -47,8 +47,8 @@ public final class org/jetbrains/kotlinx/dataframe/io/Jdbc2$Options : org/jetbra public final fun component5 ()Z public final fun component6 ()Lkotlin/jvm/functions/Function1; public final fun component7 ()Ljava/sql/Connection; - public final fun copy (Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;Ljava/sql/Connection;)Lorg/jetbrains/kotlinx/dataframe/io/Jdbc2$Options; - public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Jdbc2$Options;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;Ljava/sql/Connection;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Jdbc2$Options; + public final fun copy (Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;Ljava/sql/Connection;)Lorg/jetbrains/kotlinx/dataframe/io/Jdbc2$ReadOptions; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Jdbc2$ReadOptions;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;Ljava/sql/Connection;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Jdbc2$ReadOptions; public fun equals (Ljava/lang/Object;)Z public final fun getConfigureStatement ()Lkotlin/jvm/functions/Function1; public final fun getDbType ()Lorg/jetbrains/kotlinx/dataframe/io/db/DbType; diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt index 1bc770368e..a8823956a3 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt @@ -50,7 +50,7 @@ public class Jdbc : * [DataFrameReadSource] for JDBC. * * Reading from JDBC always needs a "what" (a SQL query or table name) — unlike a file, a [Connection] doesn't - * carry that instruction. Provide it via [Options.sqlQueryOrTableName]. The only exception is [ResultSet], + * carry that instruction. Provide it via [ReadOptions.sqlQueryOrTableName]. The only exception is [ResultSet], * which is already an executed query. * * Supported source types: [Connection], [DataSource], [DbConnectionConfig], [ResultSet]. @@ -60,7 +60,7 @@ public class Jdbc : */ public class Jdbc2 : DataFrameReadSource { - public data class Options( + public data class ReadOptions( /** * SQL query (e.g. `"SELECT * FROM users"`) or table name (e.g. `"users"`). * Required for [Connection], [DataSource], and [DbConnectionConfig] sources. @@ -89,7 +89,7 @@ public class Jdbc2 : DataFrameReadSource { ) override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { - if (options != null && options !is Options) return false + if (options != null && options !is ReadOptions) return false return supportedReadingTypes.any { sourceInfo.kType.isSubtypeOf(it) } } @@ -99,7 +99,7 @@ public class Jdbc2 : DataFrameReadSource { options: DataFrameReadOptions?, ): Result> = runCatching { - val opts = (options ?: Options()) as Options + val opts = (options ?: ReadOptions()) as ReadOptions @Suppress("RedundantReturnKeyword") return@runCatching when (source) { is ResultSet -> when { @@ -117,7 +117,7 @@ public class Jdbc2 : DataFrameReadSource { // Without dbType or a connection we can't read a ResultSet — fall through. else -> return Result.failure( IllegalArgumentException( - "ResultSet read requires either Options.dbType or Options.resultSetConnection", + "ResultSet read requires either ReadOptions.dbType or ReadOptions.resultSetConnection", ), ) } @@ -125,7 +125,7 @@ public class Jdbc2 : DataFrameReadSource { is Connection -> { val query = opts.sqlQueryOrTableName ?: return Result.failure( - IllegalArgumentException("Connection read requires Options.sqlQueryOrTableName"), + IllegalArgumentException("Connection read requires ReadOptions.sqlQueryOrTableName"), ) source.readDataFrame( sqlQueryOrTableName = query, @@ -140,7 +140,7 @@ public class Jdbc2 : DataFrameReadSource { is DataSource -> { val query = opts.sqlQueryOrTableName ?: return Result.failure( - IllegalArgumentException("DataSource read requires Options.sqlQueryOrTableName"), + IllegalArgumentException("DataSource read requires ReadOptions.sqlQueryOrTableName"), ) source.readDataFrame( sqlQueryOrTableName = query, @@ -155,7 +155,7 @@ public class Jdbc2 : DataFrameReadSource { is DbConnectionConfig -> { val query = opts.sqlQueryOrTableName ?: return Result.failure( - IllegalArgumentException("DbConnectionConfig read requires Options.sqlQueryOrTableName"), + IllegalArgumentException("DbConnectionConfig read requires ReadOptions.sqlQueryOrTableName"), ) source.readDataFrame( sqlQueryOrTableName = query, @@ -177,7 +177,7 @@ public class Jdbc2 : DataFrameReadSource { options: DataFrameReadOptions?, ): Result = runCatching { - val opts = (options ?: Options()) as Options + val opts = (options ?: ReadOptions()) as ReadOptions when (source) { // ResultSet has a true zero-row metadata-only path. is ResultSet -> when { @@ -187,24 +187,24 @@ public class Jdbc2 : DataFrameReadSource { opts.resultSetConnection != null -> DataFrameSchema.readResultSet(source, extractDBTypeFromConnection(opts.resultSetConnection)) - else -> error("ResultSet schema read requires either Options.dbType or Options.resultSetConnection") + else -> error("ResultSet schema read requires either ReadOptions.dbType or ReadOptions.resultSetConnection") } is Connection -> { val query = opts.sqlQueryOrTableName - ?: error("Connection schema read requires Options.sqlQueryOrTableName") + ?: error("Connection schema read requires ReadOptions.sqlQueryOrTableName") source.readDataFrameSchema(sqlQueryOrTableName = query, dbType = opts.dbType) } is DataSource -> { val query = opts.sqlQueryOrTableName - ?: error("DataSource schema read requires Options.sqlQueryOrTableName") + ?: error("DataSource schema read requires ReadOptions.sqlQueryOrTableName") source.readDataFrameSchema(sqlQueryOrTableName = query, dbType = opts.dbType) } is DbConnectionConfig -> { val query = opts.sqlQueryOrTableName - ?: error("DbConnectionConfig schema read requires Options.sqlQueryOrTableName") + ?: error("DbConnectionConfig schema read requires ReadOptions.sqlQueryOrTableName") source.readDataFrameSchema(sqlQueryOrTableName = query, dbType = opts.dbType) } diff --git a/dataframe-json/api/dataframe-json.api b/dataframe-json/api/dataframe-json.api index 46885ef8e5..0f04740b98 100644 --- a/dataframe-json/api/dataframe-json.api +++ b/dataframe-json/api/dataframe-json.api @@ -55,7 +55,7 @@ public final class org/jetbrains/kotlinx/dataframe/io/Json : org/jetbrains/kotli public final class org/jetbrains/kotlinx/dataframe/io/Json$Companion { } -public final class org/jetbrains/kotlinx/dataframe/io/Json$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { +public final class org/jetbrains/kotlinx/dataframe/io/Json$ReadOptions : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { public fun ()V public fun (Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;Z)V public synthetic fun (Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V @@ -63,8 +63,8 @@ public final class org/jetbrains/kotlinx/dataframe/io/Json$Options : org/jetbrai public final fun component2 ()Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; public final fun component3 ()Ljava/util/List; public final fun component4 ()Z - public final fun copy (Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;Z)Lorg/jetbrains/kotlinx/dataframe/io/Json$Options; - public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Json$Options;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Json$Options; + public final fun copy (Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;Z)Lorg/jetbrains/kotlinx/dataframe/io/Json$ReadOptions; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/Json$ReadOptions;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/Json$ReadOptions; public fun equals (Ljava/lang/Object;)Z public final fun getHeader ()Ljava/util/List; public final fun getKeyValuePaths ()Ljava/util/List; diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 3d96e4fd4f..3728d0f99e 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -45,11 +45,25 @@ public class Json : DataFrameWriteTarget { public data class ReadOptions( - val header: List = emptyList(), - val typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - val keyValuePaths: List = emptyList(), - val unifyNumbers: Boolean = true, - ) : DataFrameReadOptions + val header: List, + val typeClashTactic: TypeClashTactic, + val keyValuePaths: List, + val unifyNumbers: Boolean, + ) : DataFrameReadOptions { + public companion object { + public operator fun invoke( + header: List = emptyList(), + typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, + keyValuePaths: List = emptyList(), + unifyNumbers: Boolean = true, + ): ReadOptions = ReadOptions( + header = header, + typeClashTactic = typeClashTactic, + keyValuePaths = keyValuePaths, + unifyNumbers = unifyNumbers + ) + } + } override val supportedReadingTypes: Set = setOf( @@ -270,6 +284,12 @@ public class Json : } } +public val DataFrameReadOptions.Companion.Json: org.jetbrains.kotlinx.dataframe.io.Json.ReadOptions.Companion + get() = org.jetbrains.kotlinx.dataframe.io.Json.ReadOptions.Companion + +public val DataFrameWriteOptions.Companion.Json: org.jetbrains.kotlinx.dataframe.io.Json.ReadOptions.Companion + get() = org.jetbrains.kotlinx.dataframe.io.Json.ReadOptions.Companion + private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) private inline fun KType.isSuperTypeOf(): Boolean = this.isSupertypeOf(typeOf()) diff --git a/dataframe-openapi-generator/api/dataframe-openapi-generator.api b/dataframe-openapi-generator/api/dataframe-openapi-generator.api index 76583b5641..6ad417358a 100644 --- a/dataframe-openapi-generator/api/dataframe-openapi-generator.api +++ b/dataframe-openapi-generator/api/dataframe-openapi-generator.api @@ -34,7 +34,7 @@ public final class org/jetbrains/kotlinx/dataframe/io/OpenApi2 : org/jetbrains/k public final class org/jetbrains/kotlinx/dataframe/io/OpenApi2$Companion { } -public final class org/jetbrains/kotlinx/dataframe/io/OpenApi2$Options : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { +public final class org/jetbrains/kotlinx/dataframe/io/OpenApi2$ReadOptions : org/jetbrains/kotlinx/dataframe/io/DataFrameReadOptions { public fun ()V public fun (Ljava/util/List;Lio/swagger/v3/parser/core/models/ParseOptions;ZZLorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility;)V public synthetic fun (Ljava/util/List;Lio/swagger/v3/parser/core/models/ParseOptions;ZZLorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility;ILkotlin/jvm/internal/DefaultConstructorMarker;)V @@ -43,8 +43,8 @@ public final class org/jetbrains/kotlinx/dataframe/io/OpenApi2$Options : org/jet public final fun component3 ()Z public final fun component4 ()Z public final fun component5 ()Lorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility; - public final fun copy (Ljava/util/List;Lio/swagger/v3/parser/core/models/ParseOptions;ZZLorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility;)Lorg/jetbrains/kotlinx/dataframe/io/OpenApi2$Options; - public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/OpenApi2$Options;Ljava/util/List;Lio/swagger/v3/parser/core/models/ParseOptions;ZZLorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/OpenApi2$Options; + public final fun copy (Ljava/util/List;Lio/swagger/v3/parser/core/models/ParseOptions;ZZLorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility;)Lorg/jetbrains/kotlinx/dataframe/io/OpenApi2$ReadOptions; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/OpenApi2$ReadOptions;Ljava/util/List;Lio/swagger/v3/parser/core/models/ParseOptions;ZZLorg/jetbrains/kotlinx/dataframe/codeGen/MarkerVisibility;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/OpenApi2$ReadOptions; public fun equals (Ljava/lang/Object;)Z public final fun getAuth ()Ljava/util/List; public final fun getExtensionProperties ()Z diff --git a/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt b/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt index 1f6363a22b..56733ff491 100644 --- a/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt +++ b/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt @@ -32,7 +32,7 @@ import kotlin.reflect.typeOf */ public class OpenApi2 : DataFrameReadSource { - public data class Options( + public data class ReadOptions( val auth: List? = null, val parseOptions: ParseOptions? = null, val extensionProperties: Boolean = false, @@ -62,7 +62,7 @@ public class OpenApi2 : DataFrameReadSource { } override fun acceptsSource(sourceInfo: DataSourceInfo, options: DataFrameReadOptions?): Boolean { - if (options != null && options !is Options) return false + if (options != null && options !is ReadOptions) return false val ext = sourceInfo.extension?.lowercase() if (ext != null && ext !in EXTENSIONS) return false if (sourceInfo.mimeType != null && sourceInfo.mimeType !in MIME_TYPES) return false @@ -92,7 +92,7 @@ public class OpenApi2 : DataFrameReadSource { options: DataFrameReadOptions?, ): Result = runCatching { - val opts = (options ?: Options()) as Options + val opts = (options ?: ReadOptions()) as ReadOptions val kType = sourceInfo.kType // Resolve to OpenAPI-spec text, returning null if the content isn't OpenAPI. From d0416244498e3db0b9eaaf860aae667cd4fa36a7 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 28 May 2026 16:37:44 +0200 Subject: [PATCH 20/20] added shortcuts for read/write options for better discoverability --- .../jetbrains/kotlinx/dataframe/io/guess2.kt | 8 ++- .../jetbrains/kotlinx/dataframe/io/Guess2.kt | 6 +- .../kotlinx/dataframe/io/arrowReading.kt | 55 +++++++++++++-- .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 68 +++++++++++++++---- .../org/jetbrains/kotlinx/dataframe/io/tsv.kt | 68 +++++++++++++++---- .../jetbrains/kotlinx/dataframe/io/xlsx.kt | 44 +++++++++--- .../jetbrains/kotlinx/dataframe/io/Jdbc.kt | 40 ++++++++--- .../jetbrains/kotlinx/dataframe/io/json.kt | 24 ++++--- .../jetbrains/kotlinx/dataframe/io/OpenApi.kt | 32 +++++++-- 9 files changed, 271 insertions(+), 74 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt index 844c65f450..161a98fab6 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess2.kt @@ -38,7 +38,9 @@ public sealed interface DataFrameIO { public val testOrder: Int } -public interface DataFrameReadOptions +public interface DataFrameReadOptions { + public companion object; +} public interface DataFrameReadSource : DataFrameIO { /** @@ -88,7 +90,9 @@ internal typealias DataFrameReadSourceFunction = options: DataFrameReadOptions?, ) -> Result -public interface DataFrameWriteOptions +public interface DataFrameWriteOptions { + public companion object; +} public interface DataFrameWriteTarget : DataFrameIO { public val supportedWritingTypes: Set diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt index 2e2efecaf1..0c8927c139 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/Guess2.kt @@ -67,7 +67,7 @@ class Guess2 { DataFrame.readSource(file.inputStream()) shouldBe expected DataFrame.readSource(Json.decodeFromString(file.readText())) shouldBe expected - val readOptions = org.jetbrains.kotlinx.dataframe.io.Json.ReadOptions( + val readOptions = DataFrameReadOptions.Json( typeClashTactic = JSON.TypeClashTactic.ANY_COLUMNS, ) @@ -123,7 +123,7 @@ class Guess2 { Path(tsvFile.path).absolute().normalize().toUri().toURL(), ) shouldBe expected - val options = Tsv.ReadOptions(delimiter = '\t') + val options = DataFrameReadOptions.Tsv(delimiter = '\t') DataFrame.readSource(tsvFile.path, options) shouldBe expected DataFrame.readSource(Path(tsvFile.path), options) shouldBe expected @@ -192,7 +192,7 @@ class Guess2 { DataFrame.readSource(wb.getSheetAt(0)) shouldBe expected } - val options = ExcelNEW.ReadOptions() + val options = DataFrameReadOptions.Excel() // Binary streams have no extension and are accepted by every format, // so options are needed to pin ExcelNEW for the InputStream variant. diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt index 2902a29a25..5dfe90ad21 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt @@ -51,7 +51,16 @@ public class ArrowFeather : SupportedDataFrameFormat { */ public class ArrowFeatherNEW : DataFrameReadSource { - public data class ReadOptions(val nullability: NullabilityOptions = NullabilityOptions.Infer) : DataFrameReadOptions + public data class ReadOptions(val nullability: NullabilityOptions) : DataFrameReadOptions { + public companion object { + public operator fun invoke( + nullability: NullabilityOptions = NullabilityOptions.Infer, + ): ReadOptions = + ReadOptions( + nullability = nullability, + ) + } + } override val supportedReadingTypes: Set = setOf( @@ -121,6 +130,10 @@ public class ArrowFeatherNEW : DataFrameReadSource { override fun toString(): String = "ArrowFeather" } +public val DataFrameReadOptions.Companion.ArrowFeather: + org.jetbrains.kotlinx.dataframe.io.ArrowFeatherNEW.ReadOptions.Companion + get() = org.jetbrains.kotlinx.dataframe.io.ArrowFeatherNEW.ReadOptions.Companion + /** * [DataFrameReadSource] for [Arrow IPC streaming files][DataFrame.readArrowIPC]. * @@ -137,9 +150,20 @@ public class ArrowFeatherNEW : DataFrameReadSource { public class ArrowIPC : DataFrameReadSource { public data class ReadOptions( - val allocator: RootAllocator = Allocator.ROOT, - val nullability: NullabilityOptions = NullabilityOptions.Infer, - ) : DataFrameReadOptions + val allocator: RootAllocator, + val nullability: NullabilityOptions, + ) : DataFrameReadOptions { + public companion object { + public operator fun invoke( + allocator: RootAllocator = Allocator.ROOT, + nullability: NullabilityOptions = NullabilityOptions.Infer, + ): ReadOptions = + ReadOptions( + allocator = allocator, + nullability = nullability, + ) + } + } override val supportedReadingTypes: Set = setOf( @@ -211,6 +235,9 @@ public class ArrowIPC : DataFrameReadSource { override fun toString(): String = "ArrowIPC" } +public val DataFrameReadOptions.Companion.ArrowIPC: org.jetbrains.kotlinx.dataframe.io.ArrowIPC.ReadOptions.Companion + get() = org.jetbrains.kotlinx.dataframe.io.ArrowIPC.ReadOptions.Companion + /** * [DataFrameReadSource] for Apache Parquet files (read via Arrow Dataset). * @@ -223,9 +250,20 @@ public class ArrowIPC : DataFrameReadSource { public class Parquet : DataFrameReadSource { public data class ReadOptions( - val nullability: NullabilityOptions = NullabilityOptions.Infer, - val batchSize: Long = ARROW_PARQUET_DEFAULT_BATCH_SIZE, - ) : DataFrameReadOptions + val nullability: NullabilityOptions, + val batchSize: Long, + ) : DataFrameReadOptions { + public companion object { + public operator fun invoke( + nullability: NullabilityOptions = NullabilityOptions.Infer, + batchSize: Long = ARROW_PARQUET_DEFAULT_BATCH_SIZE, + ): ReadOptions = + ReadOptions( + nullability = nullability, + batchSize = batchSize, + ) + } + } override val supportedReadingTypes: Set = setOf(typeOf(), typeOf(), typeOf()) @@ -288,6 +326,9 @@ public class Parquet : DataFrameReadSource { override fun toString(): String = "Parquet" } +public val DataFrameReadOptions.Companion.Parquet: org.jetbrains.kotlinx.dataframe.io.Parquet.ReadOptions.Companion + get() = org.jetbrains.kotlinx.dataframe.io.Parquet.ReadOptions.Companion + private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) private const val READ_ARROW_FEATHER = "readArrowFeather" diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 1dc1250c4f..470062731c 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -41,21 +41,56 @@ public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITE public class Csv : DataFrameReadSource { public data class ReadOptions( - val delimiter: Char = DelimParams.CSV_DELIMITER, - val header: List = DelimParams.HEADER, - val charset: Charset? = DelimParams.CHARSET, - val colTypes: Map = DelimParams.COL_TYPES, - val skipLines: Long = DelimParams.SKIP_LINES, - val readLines: Long? = DelimParams.READ_LINES, - val parserOptions: ParserOptions? = DelimParams.PARSER_OPTIONS, - val ignoreEmptyLines: Boolean = DelimParams.IGNORE_EMPTY_LINES, - val allowMissingColumns: Boolean = DelimParams.ALLOW_MISSING_COLUMNS, - val ignoreExcessColumns: Boolean = DelimParams.IGNORE_EXCESS_COLUMNS, - val quote: Char = DelimParams.QUOTE, - val ignoreSurroundingSpaces: Boolean = DelimParams.IGNORE_SURROUNDING_SPACES, - val trimInsideQuoted: Boolean = DelimParams.TRIM_INSIDE_QUOTED, - val parseParallel: Boolean = DelimParams.PARSE_PARALLEL, - ) : DataFrameReadOptions + val delimiter: Char, + val header: List, + val charset: Charset?, + val colTypes: Map, + val skipLines: Long, + val readLines: Long?, + val parserOptions: ParserOptions?, + val ignoreEmptyLines: Boolean, + val allowMissingColumns: Boolean, + val ignoreExcessColumns: Boolean, + val quote: Char, + val ignoreSurroundingSpaces: Boolean, + val trimInsideQuoted: Boolean, + val parseParallel: Boolean, + ) : DataFrameReadOptions { + public companion object { + public operator fun invoke( + delimiter: Char = DelimParams.CSV_DELIMITER, + header: List = DelimParams.HEADER, + charset: Charset? = DelimParams.CHARSET, + colTypes: Map = DelimParams.COL_TYPES, + skipLines: Long = DelimParams.SKIP_LINES, + readLines: Long? = DelimParams.READ_LINES, + parserOptions: ParserOptions? = DelimParams.PARSER_OPTIONS, + ignoreEmptyLines: Boolean = DelimParams.IGNORE_EMPTY_LINES, + allowMissingColumns: Boolean = DelimParams.ALLOW_MISSING_COLUMNS, + ignoreExcessColumns: Boolean = DelimParams.IGNORE_EXCESS_COLUMNS, + quote: Char = DelimParams.QUOTE, + ignoreSurroundingSpaces: Boolean = DelimParams.IGNORE_SURROUNDING_SPACES, + trimInsideQuoted: Boolean = DelimParams.TRIM_INSIDE_QUOTED, + parseParallel: Boolean = DelimParams.PARSE_PARALLEL, + ): ReadOptions = + ReadOptions( + delimiter = delimiter, + header = header, + charset = charset, + colTypes = colTypes, + skipLines = skipLines, + readLines = readLines, + parserOptions = parserOptions, + ignoreEmptyLines = ignoreEmptyLines, + allowMissingColumns = allowMissingColumns, + ignoreExcessColumns = ignoreExcessColumns, + quote = quote, + ignoreSurroundingSpaces = ignoreSurroundingSpaces, + trimInsideQuoted = trimInsideQuoted, + parseParallel = parseParallel, + ) + } + } override val supportedReadingTypes: Set = setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) @@ -168,6 +203,9 @@ public class Csv : DataFrameReadSource { override fun toString(): String = "Csv" } +public val DataFrameReadOptions.Companion.Csv: org.jetbrains.kotlinx.dataframe.io.Csv.ReadOptions.Companion + get() = org.jetbrains.kotlinx.dataframe.io.Csv.ReadOptions.Companion + private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) private const val READ_CSV = "readCsv" diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index 567ec78f66..ab34461559 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -41,21 +41,56 @@ public class TsvDeephaven(private val delimiter: Char = DelimParams.TSV_DELIMITE public class Tsv : DataFrameReadSource { public data class ReadOptions( - val delimiter: Char = DelimParams.TSV_DELIMITER, - val header: List = DelimParams.HEADER, - val charset: Charset? = DelimParams.CHARSET, - val colTypes: Map = DelimParams.COL_TYPES, - val skipLines: Long = DelimParams.SKIP_LINES, - val readLines: Long? = DelimParams.READ_LINES, - val parserOptions: ParserOptions? = DelimParams.PARSER_OPTIONS, - val ignoreEmptyLines: Boolean = DelimParams.IGNORE_EMPTY_LINES, - val allowMissingColumns: Boolean = DelimParams.ALLOW_MISSING_COLUMNS, - val ignoreExcessColumns: Boolean = DelimParams.IGNORE_EXCESS_COLUMNS, - val quote: Char = DelimParams.QUOTE, - val ignoreSurroundingSpaces: Boolean = DelimParams.IGNORE_SURROUNDING_SPACES, - val trimInsideQuoted: Boolean = DelimParams.TRIM_INSIDE_QUOTED, - val parseParallel: Boolean = DelimParams.PARSE_PARALLEL, - ) : DataFrameReadOptions + val delimiter: Char, + val header: List, + val charset: Charset?, + val colTypes: Map, + val skipLines: Long, + val readLines: Long?, + val parserOptions: ParserOptions?, + val ignoreEmptyLines: Boolean, + val allowMissingColumns: Boolean, + val ignoreExcessColumns: Boolean, + val quote: Char, + val ignoreSurroundingSpaces: Boolean, + val trimInsideQuoted: Boolean, + val parseParallel: Boolean, + ) : DataFrameReadOptions { + public companion object { + public operator fun invoke( + delimiter: Char = DelimParams.TSV_DELIMITER, + header: List = DelimParams.HEADER, + charset: Charset? = DelimParams.CHARSET, + colTypes: Map = DelimParams.COL_TYPES, + skipLines: Long = DelimParams.SKIP_LINES, + readLines: Long? = DelimParams.READ_LINES, + parserOptions: ParserOptions? = DelimParams.PARSER_OPTIONS, + ignoreEmptyLines: Boolean = DelimParams.IGNORE_EMPTY_LINES, + allowMissingColumns: Boolean = DelimParams.ALLOW_MISSING_COLUMNS, + ignoreExcessColumns: Boolean = DelimParams.IGNORE_EXCESS_COLUMNS, + quote: Char = DelimParams.QUOTE, + ignoreSurroundingSpaces: Boolean = DelimParams.IGNORE_SURROUNDING_SPACES, + trimInsideQuoted: Boolean = DelimParams.TRIM_INSIDE_QUOTED, + parseParallel: Boolean = DelimParams.PARSE_PARALLEL, + ): ReadOptions = + ReadOptions( + delimiter = delimiter, + header = header, + charset = charset, + colTypes = colTypes, + skipLines = skipLines, + readLines = readLines, + parserOptions = parserOptions, + ignoreEmptyLines = ignoreEmptyLines, + allowMissingColumns = allowMissingColumns, + ignoreExcessColumns = ignoreExcessColumns, + quote = quote, + ignoreSurroundingSpaces = ignoreSurroundingSpaces, + trimInsideQuoted = trimInsideQuoted, + parseParallel = parseParallel, + ) + } + } override val supportedReadingTypes: Set = setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) @@ -167,6 +202,9 @@ public class Tsv : DataFrameReadSource { override fun toString(): String = "Tsv" } +public val DataFrameReadOptions.Companion.Tsv: org.jetbrains.kotlinx.dataframe.io.Tsv.ReadOptions.Companion + get() = org.jetbrains.kotlinx.dataframe.io.Tsv.ReadOptions.Companion + private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) private const val READ_TSV = "readTsv" diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index f0c27dd1a6..87f0a182f5 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -70,15 +70,38 @@ public class Excel : SupportedDataFrameFormat { public class ExcelNEW : DataFrameReadSource { public data class ReadOptions( - val sheetName: String? = null, - val skipRows: Int = 0, - val columns: String? = null, - val stringColumns: StringColumns? = null, - val rowsCount: Int? = null, - val nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE, - val firstRowIsHeader: Boolean = true, - val parseEmptyAsNull: Boolean = true, - ) : DataFrameReadOptions + val sheetName: String?, + val skipRows: Int, + val columns: String?, + val stringColumns: StringColumns?, + val rowsCount: Int?, + val nameRepairStrategy: NameRepairStrategy, + val firstRowIsHeader: Boolean, + val parseEmptyAsNull: Boolean, + ) : DataFrameReadOptions { + public companion object { + public operator fun invoke( + sheetName: String? = null, + skipRows: Int = 0, + columns: String? = null, + stringColumns: StringColumns? = null, + rowsCount: Int? = null, + nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE, + firstRowIsHeader: Boolean = true, + parseEmptyAsNull: Boolean = true, + ): ReadOptions = + ReadOptions( + sheetName = sheetName, + skipRows = skipRows, + columns = columns, + stringColumns = stringColumns, + rowsCount = rowsCount, + nameRepairStrategy = nameRepairStrategy, + firstRowIsHeader = firstRowIsHeader, + parseEmptyAsNull = parseEmptyAsNull, + ) + } + } // String reference paths are normalized to URL by readSourceImpl, so no String entry here; // Excel is binary, so raw String content isn't a meaningful input either. @@ -193,6 +216,9 @@ public class ExcelNEW : DataFrameReadSource { override fun toString(): String = "Xlsx" } +public val DataFrameReadOptions.Companion.Excel: org.jetbrains.kotlinx.dataframe.io.ExcelNEW.ReadOptions.Companion + get() = org.jetbrains.kotlinx.dataframe.io.ExcelNEW.ReadOptions.Companion + private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) private const val MESSAGE_REMOVE_1_1 = "Will be removed in 1.1." diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt index a8823956a3..a670e8b8a6 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt @@ -66,19 +66,40 @@ public class Jdbc2 : DataFrameReadSource { * Required for [Connection], [DataSource], and [DbConnectionConfig] sources. * Ignored for [ResultSet] (it's already an executed query). */ - val sqlQueryOrTableName: String? = null, - val limit: Int? = null, - val inferNullability: Boolean = true, + val sqlQueryOrTableName: String?, + val limit: Int?, + val inferNullability: Boolean, /** Optional, auto-detected from the source when `null`. */ - val dbType: DbType? = null, - val strictValidation: Boolean = true, - val configureStatement: (PreparedStatement) -> Unit = {}, + val dbType: DbType?, + val strictValidation: Boolean, + val configureStatement: (PreparedStatement) -> Unit, /** * Only used when the source is a [ResultSet] and [dbType] is `null`; provides a [Connection] * to auto-detect the database type. Ignored otherwise. */ - val resultSetConnection: Connection? = null, - ) : DataFrameReadOptions + val resultSetConnection: Connection?, + ) : DataFrameReadOptions { + public companion object { + public operator fun invoke( + sqlQueryOrTableName: String? = null, + limit: Int? = null, + inferNullability: Boolean = true, + dbType: DbType? = null, + strictValidation: Boolean = true, + configureStatement: (PreparedStatement) -> Unit = {}, + resultSetConnection: Connection? = null, + ): ReadOptions = + ReadOptions( + sqlQueryOrTableName = sqlQueryOrTableName, + limit = limit, + inferNullability = inferNullability, + dbType = dbType, + strictValidation = strictValidation, + configureStatement = configureStatement, + resultSetConnection = resultSetConnection, + ) + } + } override val supportedReadingTypes: Set = setOf( @@ -217,6 +238,9 @@ public class Jdbc2 : DataFrameReadSource { override fun toString(): String = "Jdbc" } +public val DataFrameReadOptions.Companion.Jdbc: org.jetbrains.kotlinx.dataframe.io.Jdbc2.ReadOptions.Companion + get() = org.jetbrains.kotlinx.dataframe.io.Jdbc2.ReadOptions.Companion + private fun DataFrame.Companion.readJDBC(stream: File): DataFrame<*> { TODO("Not yet implemented") } diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 3728d0f99e..15f9ba1d69 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -56,12 +56,13 @@ public class Json : typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, keyValuePaths: List = emptyList(), unifyNumbers: Boolean = true, - ): ReadOptions = ReadOptions( - header = header, - typeClashTactic = typeClashTactic, - keyValuePaths = keyValuePaths, - unifyNumbers = unifyNumbers - ) + ): ReadOptions = + ReadOptions( + header = header, + typeClashTactic = typeClashTactic, + keyValuePaths = keyValuePaths, + unifyNumbers = unifyNumbers, + ) } } @@ -75,7 +76,12 @@ public class Json : typeOf(), ) - public data class WriteOptions(val prettyPrint: Boolean = false) : DataFrameWriteOptions + public data class WriteOptions(val prettyPrint: Boolean) : DataFrameWriteOptions { + public companion object { + public operator fun invoke(prettyPrint: Boolean = false): WriteOptions = + WriteOptions(prettyPrint = prettyPrint) + } + } override val supportedWritingTypes: Set = setOf( @@ -287,8 +293,8 @@ public class Json : public val DataFrameReadOptions.Companion.Json: org.jetbrains.kotlinx.dataframe.io.Json.ReadOptions.Companion get() = org.jetbrains.kotlinx.dataframe.io.Json.ReadOptions.Companion -public val DataFrameWriteOptions.Companion.Json: org.jetbrains.kotlinx.dataframe.io.Json.ReadOptions.Companion - get() = org.jetbrains.kotlinx.dataframe.io.Json.ReadOptions.Companion +public val DataFrameWriteOptions.Companion.Json: org.jetbrains.kotlinx.dataframe.io.Json.WriteOptions.Companion + get() = org.jetbrains.kotlinx.dataframe.io.Json.WriteOptions.Companion private inline fun KType.isSubTypeOf(): Boolean = this.isSubtypeOf(typeOf()) diff --git a/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt b/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt index 56733ff491..c5069409a3 100644 --- a/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt +++ b/dataframe-openapi-generator/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/OpenApi.kt @@ -33,12 +33,29 @@ import kotlin.reflect.typeOf public class OpenApi2 : DataFrameReadSource { public data class ReadOptions( - val auth: List? = null, - val parseOptions: ParseOptions? = null, - val extensionProperties: Boolean = false, - val generateHelperCompanionObject: Boolean = false, - val visibility: MarkerVisibility = MarkerVisibility.IMPLICIT_PUBLIC, - ) : DataFrameReadOptions + val auth: List?, + val parseOptions: ParseOptions?, + val extensionProperties: Boolean, + val generateHelperCompanionObject: Boolean, + val visibility: MarkerVisibility, + ) : DataFrameReadOptions { + public companion object { + public operator fun invoke( + auth: List? = null, + parseOptions: ParseOptions? = null, + extensionProperties: Boolean = false, + generateHelperCompanionObject: Boolean = false, + visibility: MarkerVisibility = MarkerVisibility.IMPLICIT_PUBLIC, + ): ReadOptions = + ReadOptions( + auth = auth, + parseOptions = parseOptions, + extensionProperties = extensionProperties, + generateHelperCompanionObject = generateHelperCompanionObject, + visibility = visibility, + ) + } + } override val supportedReadingTypes: Set = setOf(typeOf(), typeOf(), typeOf(), typeOf(), typeOf()) @@ -155,6 +172,9 @@ public class OpenApi2 : DataFrameReadSource { override fun toString(): String = "OpenApi" } +public val DataFrameReadOptions.Companion.OpenApi: org.jetbrains.kotlinx.dataframe.io.OpenApi2.ReadOptions.Companion + get() = org.jetbrains.kotlinx.dataframe.io.OpenApi2.ReadOptions.Companion + /** * Allows for OpenApi type schemas to be converted to [DataSchema] interfaces. */