@@ -70,9 +70,9 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
7070 localDir = NULL , overwrite = FALSE ) {
7171 version <- paste0(" spark-" , packageVersion(" SparkR" ))
7272 hadoopVersion <- tolower(hadoopVersion )
73- hadoopVersionName <- hadoop_version_name (hadoopVersion )
73+ hadoopVersionName <- hadoopVersionName (hadoopVersion )
7474 packageName <- paste(version , " bin" , hadoopVersionName , sep = " -" )
75- localDir <- ifelse(is.null(localDir ), spark_cache_path (),
75+ localDir <- ifelse(is.null(localDir ), sparkCachePath (),
7676 normalizePath(localDir , mustWork = FALSE ))
7777
7878 if (is.na(file.info(localDir )$ isdir )) {
@@ -88,12 +88,14 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
8888
8989 # can use dir.exists(packageLocalDir) under R 3.2.0 or later
9090 if (! is.na(file.info(packageLocalDir )$ isdir ) && ! overwrite ) {
91- fmt <- " Spark %s for Hadoop %s is found, and SPARK_HOME set to %s"
91+ fmt <- " %s for Hadoop %s found, with SPARK_HOME set to %s"
9292 msg <- sprintf(fmt , version , ifelse(hadoopVersion == " without" , " Free build" , hadoopVersion ),
9393 packageLocalDir )
9494 message(msg )
9595 Sys.setenv(SPARK_HOME = packageLocalDir )
9696 return (invisible (packageLocalDir ))
97+ } else {
98+ message(" Spark not found in the cache directory. Installation will start." )
9799 }
98100
99101 packageLocalPath <- paste0(packageLocalDir , " .tgz" )
@@ -102,7 +104,7 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
102104 if (tarExists && ! overwrite ) {
103105 message(" tar file found." )
104106 } else {
105- robust_download_tar (mirrorUrl , version , hadoopVersion , packageName , packageLocalPath )
107+ robustDownloadTar (mirrorUrl , version , hadoopVersion , packageName , packageLocalPath )
106108 }
107109
108110 message(sprintf(" Installing to %s" , localDir ))
@@ -116,33 +118,37 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
116118 invisible (packageLocalDir )
117119}
118120
119- robust_download_tar <- function (mirrorUrl , version , hadoopVersion , packageName , packageLocalPath ) {
121+ robustDownloadTar <- function (mirrorUrl , version , hadoopVersion , packageName , packageLocalPath ) {
120122 # step 1: use user-provided url
121123 if (! is.null(mirrorUrl )) {
122124 msg <- sprintf(" Use user-provided mirror site: %s." , mirrorUrl )
123125 message(msg )
124- success <- direct_download_tar (mirrorUrl , version , hadoopVersion ,
126+ success <- directDownloadTar (mirrorUrl , version , hadoopVersion ,
125127 packageName , packageLocalPath )
126- if (success ) return ()
128+ if (success ) {
129+ return ()
130+ } else {
131+ message(paste0(" Unable to download from mirrorUrl: " , mirrorUrl ))
132+ }
127133 } else {
128- message(" Mirror site not provided." )
134+ message(" MirrorUrl not provided." )
129135 }
130136
131137 # step 2: use url suggested from apache website
132- message(" Looking for site suggested from apache website..." )
133- mirrorUrl <- get_preferred_mirror (version , packageName )
138+ message(" Looking for preferred site from apache website..." )
139+ mirrorUrl <- getPreferredMirror (version , packageName )
134140 if (! is.null(mirrorUrl )) {
135- success <- direct_download_tar (mirrorUrl , version , hadoopVersion ,
141+ success <- directDownloadTar (mirrorUrl , version , hadoopVersion ,
136142 packageName , packageLocalPath )
137143 if (success ) return ()
138144 } else {
139- message(" Unable to find suggested mirror site." )
145+ message(" Unable to find preferred mirror site." )
140146 }
141147
142148 # step 3: use backup option
143149 message(" To use backup site..." )
144- mirrorUrl <- default_mirror_url ()
145- success <- direct_download_tar (mirrorUrl , version , hadoopVersion ,
150+ mirrorUrl <- defaultMirrorUrl ()
151+ success <- directDownloadTar (mirrorUrl , version , hadoopVersion ,
146152 packageName , packageLocalPath )
147153 if (success ) {
148154 return (packageLocalPath )
@@ -155,7 +161,7 @@ robust_download_tar <- function(mirrorUrl, version, hadoopVersion, packageName,
155161 }
156162}
157163
158- get_preferred_mirror <- function (version , packageName ) {
164+ getPreferredMirror <- function (version , packageName ) {
159165 jsonUrl <- paste0(" http://www.apache.org/dyn/closer.cgi?path=" ,
160166 file.path(" spark" , version , packageName ),
161167 " .tgz&as_json=1" )
@@ -175,10 +181,10 @@ get_preferred_mirror <- function(version, packageName) {
175181 mirrorPreferred
176182}
177183
178- direct_download_tar <- function (mirrorUrl , version , hadoopVersion , packageName , packageLocalPath ) {
184+ directDownloadTar <- function (mirrorUrl , version , hadoopVersion , packageName , packageLocalPath ) {
179185 packageRemotePath <- paste0(
180186 file.path(mirrorUrl , version , packageName ), " .tgz" )
181- fmt <- paste( " Downloading Spark %s for Hadoop %s from:\n - %s" )
187+ fmt <- " Downloading %s for Hadoop %s from:\n - %s"
182188 msg <- sprintf(fmt , version , ifelse(hadoopVersion == " without" , " Free build" , hadoopVersion ),
183189 packageRemotePath )
184190 message(msg )
@@ -192,11 +198,11 @@ direct_download_tar <- function(mirrorUrl, version, hadoopVersion, packageName,
192198 ! isFail
193199}
194200
195- default_mirror_url <- function () {
201+ defaultMirrorUrl <- function () {
196202 " http://www-us.apache.org/dist/spark"
197203}
198204
199- hadoop_version_name <- function (hadoopVersion ) {
205+ hadoopVersionName <- function (hadoopVersion ) {
200206 if (hadoopVersion == " without" ) {
201207 " without-hadoop"
202208 } else if (grepl(" ^[0-9]+\\ .[0-9]+$" , hadoopVersion , perl = TRUE )) {
@@ -208,7 +214,7 @@ hadoop_version_name <- function(hadoopVersion) {
208214
209215# The implementation refers to appdirs package: https://pypi.python.org/pypi/appdirs and
210216# adapt to Spark context
211- spark_cache_path <- function () {
217+ sparkCachePath <- function () {
212218 if (.Platform $ OS.type == " windows" ) {
213219 winAppPath <- Sys.getenv(" LOCALAPPDATA" , unset = NA )
214220 if (is.na(winAppPath )) {
@@ -231,3 +237,21 @@ spark_cache_path <- function() {
231237 }
232238 normalizePath(path , mustWork = FALSE )
233239}
240+
241+
242+ installInstruction <- function (mode ) {
243+ if (mode == " remote" ) {
244+ paste0(" Connecting to a remote Spark master. " ,
245+ " Please make sure Spark package is also installed in this machine.\n " ,
246+ " - If there is one, set the path in sparkHome parameter or " ,
247+ " environment variable SPARK_HOME.\n " ,
248+ " - If not, you may run install.spark function to do the job. " ,
249+ " Please make sure the Spark and the Hadoop versions " ,
250+ " match the versions on the cluster. " ,
251+ " SparkR package is compatible with Spark " , packageVersion(" SparkR" ), " ." ,
252+ " If you need further help, " ,
253+ " contact the administrators of the cluster." )
254+ } else {
255+ stop(paste0(" No instruction found for " , mode , " mode." ))
256+ }
257+ }
0 commit comments