diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceOptions.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceOptions.java index c32053580f016..83df3be747085 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceOptions.java +++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceOptions.java @@ -17,16 +17,61 @@ package org.apache.spark.sql.sources.v2; +import java.io.IOException; import java.util.HashMap; import java.util.Locale; import java.util.Map; import java.util.Optional; +import java.util.stream.Stream; + +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.spark.annotation.InterfaceStability; /** * An immutable string-to-string map in which keys are case-insensitive. This is used to represent * data source options. + * + * Each data source implementation can define its own options and teach its users how to set them. + * Spark doesn't have any restrictions about what options a data source should or should not have. + * Instead Spark defines some standard options that data sources can optionally adopt. It's possible + * that some options are very common and many data sources use them. However different data + * sources may define the common options(key and meaning) differently, which is quite confusing to + * end users. + * + * The standard options defined by Spark: + *
| Option key | + *Option value | + *
|---|---|
| path | + *A path string of the data files/directories, like
+ * path1, /absolute/file2, path3/*. The path can
+ * either be relative or absolute, points to either file or directory, and can contain
+ * wildcards. This option is commonly used by file-based data sources. |
+ *
| paths | + *A JSON array style paths string of the data files/directories, like
+ * ["path1", "/absolute/file2"]. The format of each path is same as the
+ * path option, plus it should follow JSON string literal format, e.g. quotes
+ * should be escaped, pa\"th means pa"th.
+ * |
+ *
| table | + *A table name string representing the table name directly without any interpretation.
+ * For example, db.tbl means a table called db.tbl, not a table called tbl
+ * inside database db. `t*b.l` means a table called `t*b.l`, not t*b.l. |
+ *
| database | + *A database name string representing the database name directly without any + * interpretation, which is very similar to the table name option. | + *