coltypes

apache · olarayej · Sep 24, 2015 · Sep 24, 2015 · Sep 24, 2015 · Sep 24, 2015
commit a68f97a557df528b929595ef6487cd515a232d2f
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
@@ -23,9 +23,11 @@ export("setJobGroup",
 exportClasses("DataFrame")
 
 exportMethods("arrange",
-              "attach",
+              "as.data.frame",
+              "attach", 
               "cache",
               "collect",
+              "coltypes",
               "columns",
               "count",
               "cov",

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
@@ -2102,6 +2102,7 @@ setMethod("as.data.frame",
               stop(paste("Unused argument(s): ", paste(list(...), collapse=", ")))
             }
             collect(x)
+<<<<<<< HEAD
           })
 
 #' The specified DataFrame is attached to the R search path. This means that
@@ -2152,3 +2153,31 @@ setMethod("with",
             newEnv <- assignNewEnv(data)
             eval(substitute(expr), envir = newEnv, enclos = newEnv)
           })
+
+#' Returns the column types of a DataFrame.
+#' 
+#' @name coltypes
+#' @title Get column types of a DataFrame
+#' @param x (DataFrame)
+#' @return value (character) A character vector with the column types of the given DataFrame
+#' @rdname coltypes
+setMethod("coltypes",
+          signature(x = "DataFrame"),
+          function(x) {
+            # TODO: This may be moved as a global parameter
+            # These are the supported data types and how they map to
+            # R's data types
+            DATA_TYPES <- c("string"="character",
+                            "double"="numeric",
+                            "int"="integer",
+                            "long"="integer",
+                            "boolean"="long"
+            )
+
+            # Get the data types of the DataFrame by invoking dtypes() function.
+            # Some post-processing is needed.
+            types <- as.character(t(as.data.frame(dtypes(x))[2, ]))
+
+            # Map Spark data types into R's data types
+            as.character(DATA_TYPES[types])
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
@@ -1027,7 +1027,6 @@ setGeneric("weekofyear", function(x) { standardGeneric("weekofyear") })
 #' @export
 setGeneric("year", function(x) { standardGeneric("year") })
 
-
 #' @rdname glm
 #' @export
 setGeneric("glm")
@@ -1047,3 +1046,7 @@ setGeneric("attach")
 #' @rdname with
 #' @export
 setGeneric("with")
+
+#' @rdname coltypes
+#' @export
+setGeneric("coltypes", function(x) standardGeneric("coltypes"))
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
@@ -1460,13 +1460,15 @@ test_that("SQL error message is returned from JVM", {
   expect_equal(grepl("Table not found: blah", retError), TRUE)
 })
 
+irisDF <- createDataFrame(sqlContext, iris)
+
 test_that("Method as.data.frame as a synonym for collect()", {
-  irisDF <- createDataFrame(sqlContext, iris)
   expect_equal(as.data.frame(irisDF), collect(irisDF))
   irisDF2 <- irisDF[irisDF$Species == "setosa", ]
   expect_equal(as.data.frame(irisDF2), collect(irisDF2))
 })
 
+<<<<<<< HEAD
 test_that("attach() on a DataFrame", {
   df <- jsonFile(sqlContext, jsonPath)
   expect_error(age)
@@ -1496,6 +1498,10 @@ test_that("with() on a DataFrame", {
   expect_equal(nrow(sum2), 35)
 })
 
+test_that("Method coltypes() to get R's data types of a DataFrame", {
+  expect_equal(coltypes(irisDF), c(rep("numeric", 4), "character"))
+})
+
 unlink(parquetPath)
 unlink(jsonPath)
-unlink(jsonPathNa)
+unlink(jsonPathNa)