Skip to content

Commit 3386d5d

Browse files
authored
Merge pull request high-performance-spark#86 from holdenk/fix-more-style
Fix more style
2 parents 6cc6475 + cd69b2e commit 3386d5d

File tree

6 files changed

+26
-19
lines changed

6 files changed

+26
-19
lines changed

high_performance_pyspark/simple_perf.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,19 @@ def generate_scale_data(sqlCtx, rows, numCols):
4040
scalasc = jsc.sc()
4141
gateway = sc._gateway
4242
# Call a java method that gives us back an RDD of JVM Rows (Int, Double)
43-
# While Python RDDs are wrapped Java RDDs (even of Rows) the contents are different, so we
44-
# can't directly wrap this.
43+
# While Python RDDs are wrapped Java RDDs (even of Rows) the contents are
44+
# different, so we can't directly wrap this.
4545
# This returns a Java RDD of Rows - normally it would better to
46-
# return a DataFrame directly, but for illustration we will work with an RDD
47-
# of Rows.
48-
java_rdd = gateway.jvm.com.highperformancespark.examples.tools.GenerateScalingData. \
49-
generateMiniScaleRows(scalasc, rows, numCols)
46+
# return a DataFrame directly, but for illustration we will work
47+
# with an RDD of Rows.
48+
java_rdd = (gateway.jvm.com.highperformancespark.examples.
49+
tools.GenerateScalingData.
50+
generateMiniScaleRows(scalasc, rows, numCols))
5051
# Schemas are serialized to JSON and sent back and forth
5152
# Construct a Python Schema and turn it into a Java Schema
52-
schema = StructType([StructField("zip", IntegerType()), StructField("fuzzyness", DoubleType())])
53+
schema = StructType([
54+
StructField("zip", IntegerType()),
55+
StructField("fuzzyness", DoubleType())])
5356
# 2.1 / pre-2.1
5457
try:
5558
jschema = javaSqlCtx.parseDataType(schema.json())

src/main/java/com/highperformancespark/examples/JavaInterop.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
public class JavaInterop {
1717

1818
//tag::realClassTag[]
19-
public static JavaPairRDD wrapPairRDD(RDD<Tuple2<String, Object>> rdd) {
19+
public static JavaPairRDD wrapPairRDD(
20+
RDD<Tuple2<String, Object>> rdd) {
2021
// Construct the class tags
2122
ClassTag<String> strCt = ClassTag$.MODULE$.apply(String.class);
2223
ClassTag<Long> longCt = ClassTag$.MODULE$.apply(scala.Long.class);
@@ -25,10 +26,11 @@ public static JavaPairRDD wrapPairRDD(RDD<Tuple2<String, Object>> rdd) {
2526
//end::realClassTag[]
2627

2728
//tag::fakeClassTag[]
28-
public static JavaPairRDD wrapPairRDDFakeCt(RDD<Tuple2<String, Object>> rdd) {
29-
// Construct the class tags by casting AnyRef - this would be more commonly done with
30-
// generic or templated code where we can't explicitly construct the correct class tag
31-
// as using fake class tags may result in degraded performance.
29+
public static JavaPairRDD wrapPairRDDFakeCt(
30+
RDD<Tuple2<String, Object>> rdd) {
31+
// Construct the class tags by casting AnyRef - this would be more commonly done
32+
// with generic or templated code where we can't explicitly construct the correct
33+
// class tag as using fake class tags may result in degraded performance.
3234
ClassTag<Object> fake = ClassTag$.MODULE$.AnyRef();
3335
return new JavaPairRDD(rdd, fake, fake);
3436
}

src/main/java/com/highperformancespark/examples/WordCount.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@ public final class WordCount {
1616
public static void main(String[] args) throws Exception {
1717
JavaSparkContext jsc = new JavaSparkContext();
1818
JavaRDD<String> lines = jsc.textFile(args[0]);
19-
JavaRDD<String> words = lines.flatMap(e -> Arrays.asList(pattern.split(e)).iterator());
19+
JavaRDD<String> words = lines.flatMap(e -> Arrays.asList(
20+
pattern.split(e)).iterator());
2021
JavaPairRDD<String, Integer> wordsIntial = words.mapToPair(
21-
e -> new Tuple2<String, Integer>(e, 1));
22+
e -> new Tuple2<String, Integer>(e, 1));
2223
}
2324
}
2425
//end::wordCount[]

src/main/java/com/highperformancespark/examples/dataframe/JavaUDFs.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ public class JavaUDFs {
1010

1111
public static void setupUDFs(SQLContext sqlContext) {
1212
//tag::basicUDF[]
13-
sqlContext.udf().register("strlen", (String s) -> s.length(), DataTypes.StringType);
13+
sqlContext.udf()
14+
.register("strlen",
15+
(String s) -> s.length(), DataTypes.StringType);
1416
//end::basicUDF[]
1517
}
1618

src/main/scala/com/high-performance-spark-examples/goldilocks/GoldilocksFirstTry.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import scala.collection.{Map, mutable}
1010
object GoldilocksGroupByKey {
1111
//tag::groupByKey[]
1212
def findRankStatistics(
13-
dataFrame: DataFrame ,
13+
dataFrame: DataFrame,
1414
ranks: List[Long]): Map[Int, Iterable[Double]] = {
1515
require(ranks.forall(_ > 0))
1616
//Map to column index, value pairs
@@ -199,7 +199,7 @@ object GoldilocksFirstTry {
199199
def aggregateColumnFrequencies (partitionIndex : Int,
200200
valueColumnPairs : Iterator[(Double, Int)]) = {
201201
val columnsFreq : Array[Long] = valueColumnPairs.aggregate(zero)(
202-
(a : Array[Long], v : (Double ,Int)) => {
202+
(a : Array[Long], v : (Double, Int)) => {
203203
val (value, colIndex) = v
204204
//increment the cell in the zero array corresponding to this column index
205205
a(colIndex) = a(colIndex) + 1L

src/main/scala/com/high-performance-spark-examples/transformations/NarrowAndWide.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
package com.highperformancespark.examples.transformations
32

43
import org.apache.spark.rdd.RDD
@@ -26,7 +25,7 @@ object NarrowAndWide {
2625
def simpleSparkProgram(rdd : RDD[Double]): Long ={
2726
//stage1
2827
rdd.filter(_< 1000.0)
29-
.map(x => (x , x) )
28+
.map(x => (x, x) )
3029
//stage2
3130
.groupByKey()
3231
.map{ case(value, groups) => (groups.sum, value)}

0 commit comments

Comments
 (0)