|
| 1 | +% Generated by roxygen2 (4.1.0): do not edit by hand |
| 2 | +% Please edit documentation in R/generics.R, R/pairRDD.R |
| 3 | +\docType{methods} |
| 4 | +\name{sampleByKey} |
| 5 | +\alias{sampleByKey} |
| 6 | +\alias{sampleByKey,RDD,logical,vector,integer-method} |
| 7 | +\alias{sampleByKey,RDD-method} |
| 8 | +\title{Return a subset of this RDD sampled by key.} |
| 9 | +\usage{ |
| 10 | +sampleByKey(x, withReplacement, fractions, seed) |
| 11 | + |
| 12 | +\S4method{sampleByKey}{RDD,logical,vector,integer}(x, withReplacement, |
| 13 | + fractions, seed) |
| 14 | +} |
| 15 | +\arguments{ |
| 16 | +\item{x}{The RDD to sample elements by key, where each element is |
| 17 | +list(K, V) or c(K, V).} |
| 18 | + |
| 19 | +\item{withReplacement}{Sampling with replacement or not} |
| 20 | + |
| 21 | +\item{seed}{Randomness seed value} |
| 22 | + |
| 23 | +\item{fraction}{The (rough) sample target fraction} |
| 24 | +} |
| 25 | +\description{ |
| 26 | +\code{sampleByKey} Create a sample of this RDD using variable sampling rates |
| 27 | +for different keys as specified by fractions, a key to sampling rate map. |
| 28 | +} |
| 29 | +\examples{ |
| 30 | +\dontrun{ |
| 31 | +sc <- sparkR.init() |
| 32 | +rdd <- parallelize(sc, 1:3000) |
| 33 | +pairs <- lapply(rdd, function(x) { if (x \%\% 3 == 0) list("a", x) |
| 34 | + else { if (x \%\% 3 == 1) list("b", x) else list("c", x) }}) |
| 35 | +fractions <- list(a = 0.2, b = 0.1, c = 0.3) |
| 36 | +sample <- sampleByKey(pairs, FALSE, fractions, 1618L) |
| 37 | +100 < length(lookup(sample, "a")) && 300 > length(lookup(sample, "a")) # TRUE |
| 38 | +50 < length(lookup(sample, "b")) && 150 > length(lookup(sample, "b")) # TRUE |
| 39 | +200 < length(lookup(sample, "c")) && 400 > length(lookup(sample, "c")) # TRUE |
| 40 | +lookup(sample, "a")[which.min(lookup(sample, "a"))] >= 0 # TRUE |
| 41 | +lookup(sample, "a")[which.max(lookup(sample, "a"))] <= 2000 # TRUE |
| 42 | +lookup(sample, "b")[which.min(lookup(sample, "b"))] >= 0 # TRUE |
| 43 | +lookup(sample, "b")[which.max(lookup(sample, "b"))] <= 2000 # TRUE |
| 44 | +lookup(sample, "c")[which.min(lookup(sample, "c"))] >= 0 # TRUE |
| 45 | +lookup(sample, "c")[which.max(lookup(sample, "c"))] <= 2000 # TRUE |
| 46 | +fractions <- list(a = 0.2, b = 0.1, c = 0.3, d = 0.4) |
| 47 | +sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # Key "d" will be ignored |
| 48 | +fractions <- list(a = 0.2, b = 0.1) |
| 49 | +sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # KeyError: "c" |
| 50 | +} |
| 51 | +} |
| 52 | + |
0 commit comments