Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
SPARK-1438 fixing more space formatting issues
  • Loading branch information
arun-rama committed Apr 24, 2014
commit 07bb06e9f15406567c4907d964a959728b8bf77b
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ class PartitionwiseSampledRDDPartition(val prev: Partition, val seed: Long)
*
* @param prev RDD to be sampled
* @param sampler a random sampler
* @param seed random seed, default to a Long value generated by an instance of
* java.util.Random shared within the library code
* @param seed random seed
* @tparam T input RDD item type
* @tparam U sampled RDD item type
*/
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/rdd/RDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ abstract class RDD[T: ClassTag](
* Randomly splits this RDD with the provided weights.
*
* @param weights weights for splits, will be normalized if they don't sum to 1
* @param seed random seed, default to rand.nextLong
* @param seed random seed
*
* @return split RDDs in an array
*/
Expand Down
2 changes: 1 addition & 1 deletion core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ class RDDSuite extends FunSuite with SharedSparkContext {
test("takeSample") {
val data = sc.parallelize(1 to 100, 2)

for (num <- List(5,20,100)) {
for (num <- List(5, 20, 100)) {
val sample = data.takeSample(withReplacement=false, num=num)
assert(sample.size === num) // Got exactly num elements
assert(sample.toSet.size === num) // Elements are distinct
Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,9 +384,9 @@ def takeSample(self, withReplacement, num, seed=None):
# See: scala/spark/RDD.scala
rand = Random(seed)
while len(samples) < total:
samples = self.sample(withReplacement, fraction, rand.randint(0,sys.maxint)).collect()
samples = self.sample(withReplacement, fraction, rand.randint(0, sys.maxint)).collect()

sampler = RDDSampler(withReplacement, fraction, rand.randint(0,sys.maxint))
sampler = RDDSampler(withReplacement, fraction, rand.randint(0, sys.maxint))
sampler.shuffle(samples)
return samples[0:total]

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/rddsampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(self, withReplacement, fraction, seed=None):
print >> sys.stderr, "NumPy does not appear to be installed. Falling back to default random generator for sampling."
self._use_numpy = False

self._seed = seed if seed is not None else random.randint(0,sys.maxint)
self._seed = seed if seed is not None else random.randint(0, sys.maxint)
self._withReplacement = withReplacement
self._fraction = fraction
self._random = None
Expand Down