Skip to content

Commit b2dd795

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents 4bbe1fd + 411c04a commit b2dd795

File tree

80 files changed

+793
-469
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+793
-469
lines changed

docs/sql-programming-guide.md

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ case class Person(name: String, age: Int)
529529

530530
// Create an RDD of Person objects and register it as a table.
531531
val people = sc.textFile("examples/src/main/resources/people.txt").map(_.split(",")).map(p => Person(p(0), p(1).trim.toInt)).toDF()
532-
people.registerTempTable("people")
532+
people.createOrReplaceTempView("people")
533533

534534
// SQL statements can be run by using the sql methods provided by sqlContext.
535535
val teenagers = sqlContext.sql("SELECT name, age FROM people WHERE age >= 13 AND age <= 19")
@@ -605,7 +605,7 @@ JavaRDD<Person> people = sc.textFile("examples/src/main/resources/people.txt").m
605605

606606
// Apply a schema to an RDD of JavaBeans and register it as a table.
607607
DataFrame schemaPeople = sqlContext.createDataFrame(people, Person.class);
608-
schemaPeople.registerTempTable("people");
608+
schemaPeople.createOrReplaceTempView("people");
609609

610610
// SQL can be run over RDDs that have been registered as tables.
611611
DataFrame teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
@@ -643,7 +643,7 @@ people = parts.map(lambda p: Row(name=p[0], age=int(p[1])))
643643

644644
# Infer the schema, and register the DataFrame as a table.
645645
schemaPeople = sqlContext.createDataFrame(people)
646-
schemaPeople.registerTempTable("people")
646+
schemaPeople.createOrReplaceTempView("people")
647647

648648
# SQL can be run over DataFrames that have been registered as a table.
649649
teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
@@ -703,8 +703,8 @@ val rowRDD = people.map(_.split(",")).map(p => Row(p(0), p(1).trim))
703703
// Apply the schema to the RDD.
704704
val peopleDataFrame = sqlContext.createDataFrame(rowRDD, schema)
705705

706-
// Register the DataFrames as a table.
707-
peopleDataFrame.registerTempTable("people")
706+
// Creates a temporary view using the DataFrame.
707+
peopleDataFrame.createOrReplaceTempView("people")
708708

709709
// SQL statements can be run by using the sql methods provided by sqlContext.
710710
val results = sqlContext.sql("SELECT name FROM people")
@@ -771,10 +771,10 @@ JavaRDD<Row> rowRDD = people.map(
771771
// Apply the schema to the RDD.
772772
DataFrame peopleDataFrame = sqlContext.createDataFrame(rowRDD, schema);
773773

774-
// Register the DataFrame as a table.
775-
peopleDataFrame.registerTempTable("people");
774+
// Creates a temporary view using the DataFrame.
775+
peopleDataFrame.createOrReplaceTempView("people");
776776

777-
// SQL can be run over RDDs that have been registered as tables.
777+
// SQL can be run over a temporary view created using DataFrames.
778778
DataFrame results = sqlContext.sql("SELECT name FROM people");
779779

780780
// The results of SQL queries are DataFrames and support all the normal RDD operations.
@@ -824,8 +824,8 @@ schema = StructType(fields)
824824
# Apply the schema to the RDD.
825825
schemaPeople = sqlContext.createDataFrame(people, schema)
826826

827-
# Register the DataFrame as a table.
828-
schemaPeople.registerTempTable("people")
827+
# Creates a temporary view using the DataFrame
828+
schemaPeople.createOrReplaceTempView("people")
829829

830830
# SQL can be run over DataFrames that have been registered as a table.
831831
results = sqlContext.sql("SELECT name FROM people")
@@ -844,7 +844,7 @@ for name in names.collect():
844844
# Data Sources
845845

846846
Spark SQL supports operating on a variety of data sources through the `DataFrame` interface.
847-
A DataFrame can be operated on as normal RDDs and can also be registered as a temporary table.
847+
A DataFrame can be operated on as normal RDDs and can also be used to create a temporary view.
848848
Registering a DataFrame as a table allows you to run SQL queries over its data. This section
849849
describes the general methods for loading and saving data using the Spark Data Sources and then
850850
goes into specific options that are available for the built-in data sources.
@@ -1072,8 +1072,8 @@ people.write.parquet("people.parquet")
10721072
// The result of loading a Parquet file is also a DataFrame.
10731073
val parquetFile = sqlContext.read.parquet("people.parquet")
10741074

1075-
//Parquet files can also be registered as tables and then used in SQL statements.
1076-
parquetFile.registerTempTable("parquetFile")
1075+
// Parquet files can also be used to create a temporary view and then used in SQL statements.
1076+
parquetFile.createOrReplaceTempView("parquetFile")
10771077
val teenagers = sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
10781078
teenagers.map(t => "Name: " + t(0)).collect().foreach(println)
10791079
{% endhighlight %}
@@ -1094,8 +1094,8 @@ schemaPeople.write().parquet("people.parquet");
10941094
// The result of loading a parquet file is also a DataFrame.
10951095
DataFrame parquetFile = sqlContext.read().parquet("people.parquet");
10961096

1097-
// Parquet files can also be registered as tables and then used in SQL statements.
1098-
parquetFile.registerTempTable("parquetFile");
1097+
// Parquet files can also be used to create a temporary view and then used in SQL statements.
1098+
parquetFile.createOrReplaceTempView("parquetFile");
10991099
DataFrame teenagers = sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
11001100
List<String> teenagerNames = teenagers.javaRDD().map(new Function<Row, String>() {
11011101
public String call(Row row) {
@@ -1120,8 +1120,8 @@ schemaPeople.write.parquet("people.parquet")
11201120
# The result of loading a parquet file is also a DataFrame.
11211121
parquetFile = sqlContext.read.parquet("people.parquet")
11221122

1123-
# Parquet files can also be registered as tables and then used in SQL statements.
1124-
parquetFile.registerTempTable("parquetFile");
1123+
# Parquet files can also be used to create a temporary view and then used in SQL statements.
1124+
parquetFile.createOrReplaceTempView("parquetFile");
11251125
teenagers = sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
11261126
teenNames = teenagers.map(lambda p: "Name: " + p.name)
11271127
for teenName in teenNames.collect():
@@ -1144,7 +1144,7 @@ write.parquet(schemaPeople, "people.parquet")
11441144
# The result of loading a parquet file is also a DataFrame.
11451145
parquetFile <- read.parquet(sqlContext, "people.parquet")
11461146

1147-
# Parquet files can also be registered as tables and then used in SQL statements.
1147+
# Parquet files can also be used to create a temporary view and then used in SQL statements.
11481148
registerTempTable(parquetFile, "parquetFile")
11491149
teenagers <- sql(sqlContext, "SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
11501150
schema <- structType(structField("name", "string"))
@@ -1506,8 +1506,8 @@ people.printSchema()
15061506
// |-- age: long (nullable = true)
15071507
// |-- name: string (nullable = true)
15081508

1509-
// Register this DataFrame as a table.
1510-
people.registerTempTable("people")
1509+
// Creates a temporary view using the DataFrame
1510+
people.createOrReplaceTempView("people")
15111511

15121512
// SQL statements can be run by using the sql methods provided by sqlContext.
15131513
val teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
@@ -1544,8 +1544,8 @@ people.printSchema();
15441544
// |-- age: long (nullable = true)
15451545
// |-- name: string (nullable = true)
15461546

1547-
// Register this DataFrame as a table.
1548-
people.registerTempTable("people");
1547+
// Creates a temporary view using the DataFrame
1548+
people.createOrReplaceTempView("people");
15491549

15501550
// SQL statements can be run by using the sql methods provided by sqlContext.
15511551
DataFrame teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
@@ -1582,8 +1582,8 @@ people.printSchema()
15821582
# |-- age: long (nullable = true)
15831583
# |-- name: string (nullable = true)
15841584

1585-
# Register this DataFrame as a table.
1586-
people.registerTempTable("people")
1585+
# Creates a temporary view using the DataFrame.
1586+
people.createOrReplaceTempView("people")
15871587

15881588
# SQL statements can be run by using the sql methods provided by `sqlContext`.
15891589
teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")

docs/streaming-programming-guide.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1553,8 +1553,8 @@ words.foreachRDD { rdd =>
15531553
// Convert RDD[String] to DataFrame
15541554
val wordsDataFrame = rdd.toDF("word")
15551555

1556-
// Register as table
1557-
wordsDataFrame.registerTempTable("words")
1556+
// Create a temporary view
1557+
wordsDataFrame.createOrReplaceTempView("words")
15581558

15591559
// Do word count on DataFrame using SQL and print it
15601560
val wordCountsDataFrame =
@@ -1606,8 +1606,8 @@ words.foreachRDD(
16061606
});
16071607
DataFrame wordsDataFrame = sqlContext.createDataFrame(rowRDD, JavaRow.class);
16081608

1609-
// Register as table
1610-
wordsDataFrame.registerTempTable("words");
1609+
// Creates a temporary view using the DataFrame
1610+
wordsDataFrame.createOrReplaceTempView("words");
16111611

16121612
// Do word count on table using SQL and print it
16131613
DataFrame wordCountsDataFrame =
@@ -1646,8 +1646,8 @@ def process(time, rdd):
16461646
rowRdd = rdd.map(lambda w: Row(word=w))
16471647
wordsDataFrame = sqlContext.createDataFrame(rowRdd)
16481648

1649-
# Register as table
1650-
wordsDataFrame.registerTempTable("words")
1649+
# Creates a temporary view using the DataFrame
1650+
wordsDataFrame.createOrReplaceTempView("words")
16511651

16521652
# Do word count on table using SQL and print it
16531653
wordCountsDataFrame = sqlContext.sql("select word, count(*) as total from words group by word")

examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,11 @@ public Person call(String line) {
7373
}
7474
});
7575

76-
// Apply a schema to an RDD of Java Beans and register it as a table.
76+
// Apply a schema to an RDD of Java Beans and create a temporary view
7777
Dataset<Row> schemaPeople = spark.createDataFrame(people, Person.class);
7878
schemaPeople.createOrReplaceTempView("people");
7979

80-
// SQL can be run over RDDs that have been registered as tables.
80+
// SQL can be run over RDDs which backs a temporary view.
8181
Dataset<Row> teenagers = spark.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
8282

8383
// The results of SQL queries are DataFrames and support all the normal RDD operations.
@@ -101,7 +101,7 @@ public String call(Row row) {
101101
// The result of loading a parquet file is also a DataFrame.
102102
Dataset<Row> parquetFile = spark.read().parquet("people.parquet");
103103

104-
//Parquet files can also be registered as tables and then used in SQL statements.
104+
// A temporary view can be created by using Parquet files and then used in SQL statements.
105105
parquetFile.createOrReplaceTempView("parquetFile");
106106
Dataset<Row> teenagers2 =
107107
spark.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
@@ -130,7 +130,7 @@ public String call(Row row) {
130130
// |-- age: IntegerType
131131
// |-- name: StringType
132132

133-
// Register this DataFrame as a table.
133+
// Creates a temporary view using the DataFrame
134134
peopleFromJsonFile.createOrReplaceTempView("people");
135135

136136
// SQL statements can be run by using the sql methods provided by `spark`

examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ public JavaRecord call(String word) {
9494
});
9595
Dataset<Row> wordsDataFrame = spark.createDataFrame(rowRDD, JavaRecord.class);
9696

97-
// Register as table
97+
// Creates a temporary view using the DataFrame
9898
wordsDataFrame.createOrReplaceTempView("words");
9999

100100
// Do word count on table using SQL and print it

examples/src/main/python/sql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
# |-- age: long (nullable = true)
6767
# |-- name: string (nullable = true)
6868

69-
# Register this DataFrame as a temporary table.
69+
# Creates a temporary view using the DataFrame.
7070
people.createOrReplaceTempView("people")
7171

7272
# SQL statements can be run by using the sql methods provided by `spark`

examples/src/main/python/streaming/sql_network_wordcount.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def process(time, rdd):
7070
rowRdd = rdd.map(lambda w: Row(word=w))
7171
wordsDataFrame = spark.createDataFrame(rowRdd)
7272

73-
# Register as table
73+
# Creates a temporary view using the DataFrame.
7474
wordsDataFrame.createOrReplaceTempView("words")
7575

7676
# Do word count on table using SQL and print it

examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ object RDDRelation {
3535
import spark.implicits._
3636

3737
val df = spark.createDataFrame((1 to 100).map(i => Record(i, s"val_$i")))
38-
// Any RDD containing case classes can be registered as a table. The schema of the table is
39-
// automatically inferred using scala reflection.
38+
// Any RDD containing case classes can be used to create a temporary view. The schema of the
39+
// view is automatically inferred using scala reflection.
4040
df.createOrReplaceTempView("records")
4141

4242
// Once tables have been registered, you can run SQL queries over them.
@@ -66,7 +66,7 @@ object RDDRelation {
6666
// Queries can be run using the DSL on parquet files just like the original RDD.
6767
parquetFile.where($"key" === 1).select($"value".as("a")).collect().foreach(println)
6868

69-
// These files can also be registered as tables.
69+
// These files can also be used to create a temporary view.
7070
parquetFile.createOrReplaceTempView("parquetFile")
7171
spark.sql("SELECT * FROM parquetFile").collect().foreach(println)
7272

examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ object HiveFromSpark {
7070
case Row(key: Int, value: String) => s"Key: $key, Value: $value"
7171
}
7272

73-
// You can also register RDDs as temporary tables within a HiveContext.
73+
// You can also use RDDs to create temporary views within a HiveContext.
7474
val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i")))
75-
rdd.toDF().registerTempTable("records")
75+
rdd.toDF().createOrReplaceTempView("records")
7676

7777
// Queries can then join RDD data with data stored in Hive.
7878
println("Result of SELECT *:")

examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ object SqlNetworkWordCount {
6666
// Convert RDD[String] to RDD[case class] to DataFrame
6767
val wordsDataFrame = rdd.map(w => Record(w)).toDF()
6868

69-
// Register as table
69+
// Creates a temporary view using the DataFrame
7070
wordsDataFrame.createOrReplaceTempView("words")
7171

7272
// Do word count on table using SQL and print it

mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ public void pipeline() {
6868
Pipeline pipeline = new Pipeline()
6969
.setStages(new PipelineStage[]{scaler, lr});
7070
PipelineModel model = pipeline.fit(dataset);
71-
model.transform(dataset).registerTempTable("prediction");
71+
model.transform(dataset).createOrReplaceTempView("prediction");
7272
Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction");
7373
predictions.collectAsList();
7474
}

0 commit comments

Comments
 (0)