SPARK-1438 fixing more space formatting issues

apache · arun-rama · Apr 21, 2014 · Apr 22, 2014 · Apr 23, 2014 · Apr 24, 2014
commit 07bb06e9f15406567c4907d964a959728b8bf77b
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
@@ -39,8 +39,7 @@ class PartitionwiseSampledRDDPartition(val prev: Partition, val seed: Long)
  *
  * @param prev RDD to be sampled
  * @param sampler a random sampler
- * @param seed random seed, default to a Long value generated by an instance of 
- * java.util.Random shared within the library code
+ * @param seed random seed
  * @tparam T input RDD item type
  * @tparam U sampled RDD item type
  */

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -336,7 +336,7 @@ abstract class RDD[T: ClassTag](
    * Randomly splits this RDD with the provided weights.
    *
    * @param weights weights for splits, will be normalized if they don't sum to 1
-   * @param seed random seed, default to rand.nextLong
+   * @param seed random seed
    *
    * @return split RDDs in an array
    */

diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -466,7 +466,7 @@ class RDDSuite extends FunSuite with SharedSparkContext {
   test("takeSample") {
     val data = sc.parallelize(1 to 100, 2)
 
-    for (num <- List(5,20,100)) {
+    for (num <- List(5, 20, 100)) {
       val sample = data.takeSample(withReplacement=false, num=num)
       assert(sample.size === num)        // Got exactly num elements
       assert(sample.toSet.size === num)  // Elements are distinct

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
@@ -384,9 +384,9 @@ def takeSample(self, withReplacement, num, seed=None):
         # See: scala/spark/RDD.scala
         rand = Random(seed)
         while len(samples) < total:
-            samples = self.sample(withReplacement, fraction, rand.randint(0,sys.maxint)).collect()
+            samples = self.sample(withReplacement, fraction, rand.randint(0, sys.maxint)).collect()
 
-        sampler = RDDSampler(withReplacement, fraction, rand.randint(0,sys.maxint))
+        sampler = RDDSampler(withReplacement, fraction, rand.randint(0, sys.maxint))
         sampler.shuffle(samples)
         return samples[0:total]
 

diff --git a/python/pyspark/rddsampler.py b/python/pyspark/rddsampler.py
@@ -27,7 +27,7 @@ def __init__(self, withReplacement, fraction, seed=None):
             print >> sys.stderr, "NumPy does not appear to be installed. Falling back to default random generator for sampling."
             self._use_numpy = False
 
-        self._seed = seed if seed is not None else random.randint(0,sys.maxint)
+        self._seed = seed if seed is not None else random.randint(0, sys.maxint)
         self._withReplacement = withReplacement
         self._fraction = fraction
         self._random = None