Skip to content

Commit 141efd8

Browse files
shivaramDavies Liu
authored andcommitted
Merge pull request #245 from hqzizania/upstream
Add Rd files for sampleByKey() of [SPARKR-163] and sumRDD() of [SPARKR-92]
1 parent 9387402 commit 141efd8

File tree

3 files changed

+83
-1
lines changed

3 files changed

+83
-1
lines changed

R/pkg/R/pairRDD.R

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -821,8 +821,11 @@ setMethod("subtractByKey",
821821
function (v) { v[[1]] })
822822
})
823823

824+
#' Return a subset of this RDD sampled by key.
825+
#'
824826
#' @description
825-
#' \code{sampleByKey} return a subset RDD of the given RDD sampled by key
827+
#' \code{sampleByKey} Create a sample of this RDD using variable sampling rates
828+
#' for different keys as specified by fractions, a key to sampling rate map.
826829
#'
827830
#' @param x The RDD to sample elements by key, where each element is
828831
#' list(K, V) or c(K, V).

pkg/man/sampleByKey.Rd

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
% Generated by roxygen2 (4.1.0): do not edit by hand
2+
% Please edit documentation in R/generics.R, R/pairRDD.R
3+
\docType{methods}
4+
\name{sampleByKey}
5+
\alias{sampleByKey}
6+
\alias{sampleByKey,RDD,logical,vector,integer-method}
7+
\alias{sampleByKey,RDD-method}
8+
\title{Return a subset of this RDD sampled by key.}
9+
\usage{
10+
sampleByKey(x, withReplacement, fractions, seed)
11+
12+
\S4method{sampleByKey}{RDD,logical,vector,integer}(x, withReplacement,
13+
fractions, seed)
14+
}
15+
\arguments{
16+
\item{x}{The RDD to sample elements by key, where each element is
17+
list(K, V) or c(K, V).}
18+
19+
\item{withReplacement}{Sampling with replacement or not}
20+
21+
\item{seed}{Randomness seed value}
22+
23+
\item{fraction}{The (rough) sample target fraction}
24+
}
25+
\description{
26+
\code{sampleByKey} Create a sample of this RDD using variable sampling rates
27+
for different keys as specified by fractions, a key to sampling rate map.
28+
}
29+
\examples{
30+
\dontrun{
31+
sc <- sparkR.init()
32+
rdd <- parallelize(sc, 1:3000)
33+
pairs <- lapply(rdd, function(x) { if (x \%\% 3 == 0) list("a", x)
34+
else { if (x \%\% 3 == 1) list("b", x) else list("c", x) }})
35+
fractions <- list(a = 0.2, b = 0.1, c = 0.3)
36+
sample <- sampleByKey(pairs, FALSE, fractions, 1618L)
37+
100 < length(lookup(sample, "a")) && 300 > length(lookup(sample, "a")) # TRUE
38+
50 < length(lookup(sample, "b")) && 150 > length(lookup(sample, "b")) # TRUE
39+
200 < length(lookup(sample, "c")) && 400 > length(lookup(sample, "c")) # TRUE
40+
lookup(sample, "a")[which.min(lookup(sample, "a"))] >= 0 # TRUE
41+
lookup(sample, "a")[which.max(lookup(sample, "a"))] <= 2000 # TRUE
42+
lookup(sample, "b")[which.min(lookup(sample, "b"))] >= 0 # TRUE
43+
lookup(sample, "b")[which.max(lookup(sample, "b"))] <= 2000 # TRUE
44+
lookup(sample, "c")[which.min(lookup(sample, "c"))] >= 0 # TRUE
45+
lookup(sample, "c")[which.max(lookup(sample, "c"))] <= 2000 # TRUE
46+
fractions <- list(a = 0.2, b = 0.1, c = 0.3, d = 0.4)
47+
sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # Key "d" will be ignored
48+
fractions <- list(a = 0.2, b = 0.1)
49+
sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # KeyError: "c"
50+
}
51+
}
52+

pkg/man/sumRDD.Rd

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
% Generated by roxygen2 (4.1.0): do not edit by hand
2+
% Please edit documentation in R/RDD.R, R/generics.R
3+
\docType{methods}
4+
\name{sumRDD,RDD-method}
5+
\alias{sumRDD}
6+
\alias{sumRDD,RDD}
7+
\alias{sumRDD,RDD-method}
8+
\title{Add up the elements in an RDD.}
9+
\usage{
10+
\S4method{sumRDD}{RDD}(x)
11+
12+
sumRDD(x)
13+
}
14+
\arguments{
15+
\item{x}{The RDD to add up the elements in}
16+
}
17+
\description{
18+
Add up the elements in an RDD.
19+
}
20+
\examples{
21+
\dontrun{
22+
sc <- sparkR.init()
23+
rdd <- parallelize(sc, 1:10)
24+
sumRDD(rdd) # 55
25+
}
26+
}
27+

0 commit comments

Comments
 (0)