Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
f2bb001
include sql configs from html file
nchammas Feb 4, 2020
f0137f6
generate html table of sql configs
nchammas Feb 4, 2020
a6045f7
example of follow-up that is needed to clean docstrings
nchammas Feb 4, 2020
7742fc1
back to a plain list
nchammas Feb 4, 2020
a3b6d19
style
nchammas Feb 4, 2020
886d01e
python style
nchammas Feb 4, 2020
b80e6f2
catch missed special default values
nchammas Feb 4, 2020
914630f
fix reference
nchammas Feb 4, 2020
a8ad179
tweak create-docs info message
nchammas Feb 5, 2020
e09f890
use markdown via mkdocs
nchammas Feb 5, 2020
851d557
explain why .toArray
nchammas Feb 5, 2020
36fd916
use markdown from within mkdocs
nchammas Feb 5, 2020
6ca51cd
fix config reference + undo ConfigEntry removal
nchammas Feb 5, 2020
43e47bd
explain why weird import
nchammas Feb 6, 2020
1c9aa71
show example html output
nchammas Feb 6, 2020
65c5bb0
unnecessary comment
nchammas Feb 6, 2020
79022e7
docstring -> description
nchammas Feb 6, 2020
aa63cbe
split up sql docs script: API + Config
nchammas Feb 6, 2020
ba8ae9f
move sql-configs.html to root of docs/
nchammas Feb 6, 2020
5310f08
fix documentation around create-docs.sh
nchammas Feb 6, 2020
61b4ac5
unnecessary whitespace
nchammas Feb 6, 2020
679bdac
group imports at beginning
nchammas Feb 6, 2020
8401b6a
check for private configs
nchammas Feb 6, 2020
e48eb34
tweak default formatting
nchammas Feb 7, 2020
3292108
remove check for private configs
nchammas Feb 7, 2020
617a69d
tweak info message from create-docs.sh
nchammas Feb 7, 2020
452bf98
remove dup default
nchammas Feb 7, 2020
4c32cf2
stringify certain defaults so they display right
nchammas Feb 7, 2020
b08bac4
300 -> 5 * 60
nchammas Feb 7, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ Note: Other versions of roxygen2 might work in SparkR documentation generation b
To generate API docs for any language, you'll need to install these libraries:

```sh
$ sudo pip install sphinx mkdocs numpy
$ sudo pip install sphinx mkdocs numpy markdown
```

## Generating the Documentation HTML
Expand Down
1 change: 1 addition & 0 deletions docs/_includes/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sql-configs.html
41 changes: 2 additions & 39 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -2399,47 +2399,10 @@ the driver or executor, or, in the absence of that value, the number of cores av
Please refer to the [Security](security.html) page for available options on how to secure different
Spark subsystems.

### Spark SQL

Running the <code>SET -v</code> command will show the entire list of the SQL configuration.

<div class="codetabs">
<div data-lang="scala" markdown="1">

{% highlight scala %}
// spark is an existing SparkSession
spark.sql("SET -v").show(numRows = 200, truncate = false)
{% endhighlight %}

</div>

<div data-lang="java" markdown="1">

{% highlight java %}
// spark is an existing SparkSession
spark.sql("SET -v").show(200, false);
{% endhighlight %}
</div>

<div data-lang="python" markdown="1">

{% highlight python %}
# spark is an existing SparkSession
spark.sql("SET -v").show(n=200, truncate=False)
{% endhighlight %}

</div>

<div data-lang="r" markdown="1">

{% highlight r %}
sparkR.session()
properties <- sql("SET -v")
showDF(properties, numRows = 200, truncate = FALSE)
{% endhighlight %}
### Spark SQL

</div>
</div>
{% include sql-configs.html %}


### Spark Streaming
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1152,7 +1152,7 @@ object SQLConf {

val VARIABLE_SUBSTITUTE_ENABLED =
buildConf("spark.sql.variable.substitute")
.doc("This enables substitution using syntax like ${var} ${system:var} and ${env:var}.")
.doc("This enables substitution using syntax like `${var}`, `${system:var}`, and `${env:var}`.")
.booleanConf
.createWithDefault(true)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.execution.{ExplainMode, QueryExecution}
import org.apache.spark.sql.execution.arrow.ArrowConverters
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.DataType

private[sql] object PythonSQLUtils {
Expand All @@ -39,6 +40,11 @@ private[sql] object PythonSQLUtils {
FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray
}

def listSQLConfigs(): Array[(String, String, String)] = {
val conf = new SQLConf()
conf.getAllDefinedConfs.toArray
}

/**
* Python callable function to read a file in Arrow stream format and create a [[RDD]]
* using each serialized ArrowRecordBatch as a partition.
Expand Down
82 changes: 80 additions & 2 deletions sql/gen-sql-markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,18 @@
# limitations under the License.
#

import sys
import os
import re
import sys
from collections import namedtuple
from textwrap import dedent

from markdown import markdown

ExpressionInfo = namedtuple(
"ExpressionInfo", "className name usage arguments examples note since deprecated")
SQLConfEntry = namedtuple(
"SQLConfEntry", ["name", "default", "docstring"])


def _list_function_infos(jvm):
Expand All @@ -47,6 +53,18 @@ def _list_function_infos(jvm):
return sorted(infos, key=lambda i: i.name)


def _list_sql_configs(jvm):
sql_configs = [
SQLConfEntry(
name=_sql_config._1(),
default=_sql_config._2(),
docstring=_sql_config._3(),
)
for _sql_config in jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs()
]
return sql_configs


def _make_pretty_usage(usage):
"""
Makes the usage description pretty and returns a formatted string if `usage`
Expand Down Expand Up @@ -218,9 +236,69 @@ def generate_sql_markdown(jvm, path):
mdfile.write("<br/>\n\n")


def generate_sql_configs_table(jvm, path):
"""
Generates an HTML table at `path` that lists all public SQL
configuration options.
"""
sql_configs = _list_sql_configs(jvm)
value_reference_pattern = re.compile(r"^<value of (\S*)>$")
# ConfigEntry(key=spark.buffer.size, defaultValue=65536, doc=, public=true)
config_entry_pattern = re.compile(r"ConfigEntry\(key=(\S*), defaultValue=\S*, doc=\S*, public=\S*\)")

with open(path, 'w') as f:
f.write(dedent(
"""
<table class="table">
<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
"""
))
for config in sorted(sql_configs, key=lambda x: x.name):
if config.default == "<undefined>":
default = "none"
elif config.default.startswith("<value of "):
referenced_config_name = value_reference_pattern.match(config.default).group(1)
# difficultes in looking this up: a) potential recursion, b) references to non-SQL configs
default = "value of <code>{}</code>".format(referenced_config_name)
elif config.default.startswith("<"):
raise Exception(
"Unhandled reference in SQL config docs. Config {name} "
"has default '{default}' that looks like an HTML tag."
.format(
name=name,
default=config.default,
)
)
else:
default = config.default

docstring = config_entry_pattern.sub(r"\g<1>", config.docstring)

f.write(dedent(
"""
<tr>
<td><code>{name}</code></td>
<td>{default}</td>
<td>{docstring}</td>
</tr>
"""
.format(
name=config.name,
default=default,
docstring=markdown(docstring),
)
))
f.write("</table>\n")


if __name__ == "__main__":
from pyspark.java_gateway import launch_gateway

jvm = launch_gateway().jvm
markdown_file_path = "%s/docs/index.md" % os.path.dirname(sys.argv[0])
spark_home = os.path.dirname(os.path.dirname(__file__))

markdown_file_path = os.path.join(spark_home, "sql/docs/index.md")
sql_configs_table_path = os.path.join(spark_home, "docs/_includes/sql-configs.html")

generate_sql_markdown(jvm, markdown_file_path)
generate_sql_configs_table(jvm, sql_configs_table_path)