You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@toree.apache.org by ch...@apache.org on 2017/01/23 01:06:30 UTC
[7/7] incubator-toree git commit: Remove SparkR fork (mariusvniekerk via chipsenkbeil) closes #87

Remove SparkR fork (mariusvniekerk via chipsenkbeil) closes #87


Project: http://git-wip-us.apache.org/repos/asf/incubator-toree/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-toree/commit/2eb26cd3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-toree/tree/2eb26cd3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-toree/diff/2eb26cd3

Branch: refs/heads/0.1.x
Commit: 2eb26cd3cc923c238e03c01276bba94452d645df
Parents: acc0285
Author: mariusvniekerk <ma...@gmail.com>
Authored: Sun Jan 22 19:04:31 2017 -0600
Committer: Chip Senkbeil <ch...@gmail.com>
Committed: Sun Jan 22 19:04:31 2017 -0600

----------------------------------------------------------------------
 etc/examples/notebooks/magic-tutorial.ipynb     |    8 +-
 .../notebooks/meetup-streaming-toree.ipynb      |   12 +-
 .../src/main/resources/R/.gitignore             |    6 -
 .../src/main/resources/R/DOCUMENTATION.md       |   12 -
 .../src/main/resources/R/README.md              |   67 -
 .../src/main/resources/R/WINDOWS.md             |   13 -
 .../src/main/resources/R/create-docs.sh         |   46 -
 .../src/main/resources/R/install-dev.bat        |   27 -
 .../src/main/resources/R/install-dev.sh         |   45 -
 .../src/main/resources/R/log4j.properties       |   28 -
 .../src/main/resources/R/pkg/.lintr             |    2 -
 .../src/main/resources/R/pkg/DESCRIPTION        |   36 -
 .../src/main/resources/R/pkg/NAMESPACE          |  259 ---
 .../src/main/resources/R/pkg/R/DataFrame.R      | 1819 ----------------
 .../src/main/resources/R/pkg/R/RDD.R            | 1644 ---------------
 .../src/main/resources/R/pkg/R/SQLContext.R     |  513 -----
 .../src/main/resources/R/pkg/R/backend.R        |  117 --
 .../src/main/resources/R/pkg/R/broadcast.R      |   86 -
 .../src/main/resources/R/pkg/R/client.R         |   69 -
 .../src/main/resources/R/pkg/R/column.R         |  234 ---
 .../src/main/resources/R/pkg/R/context.R        |  225 --
 .../src/main/resources/R/pkg/R/deserialize.R    |  189 --
 .../src/main/resources/R/pkg/R/functions.R      | 1988 ------------------
 .../src/main/resources/R/pkg/R/generics.R       |  985 ---------
 .../src/main/resources/R/pkg/R/group.R          |  138 --
 .../src/main/resources/R/pkg/R/jobj.R           |  104 -
 .../src/main/resources/R/pkg/R/mllib.R          |   99 -
 .../src/main/resources/R/pkg/R/pairRDD.R        |  908 --------
 .../src/main/resources/R/pkg/R/schema.R         |  166 --
 .../src/main/resources/R/pkg/R/serialize.R      |  208 --
 .../src/main/resources/R/pkg/R/sparkR.R         |  360 ----
 .../src/main/resources/R/pkg/R/utils.R          |  600 ------
 .../main/resources/R/pkg/inst/profile/general.R |   22 -
 .../main/resources/R/pkg/inst/profile/shell.R   |   47 -
 .../inst/test_support/sparktestjar_2.10-1.0.jar |  Bin 2886 -> 0 bytes
 .../main/resources/R/pkg/inst/tests/jarTest.R   |   32 -
 .../R/pkg/inst/tests/packageInAJarTest.R        |   30 -
 .../R/pkg/inst/tests/test_binaryFile.R          |   89 -
 .../R/pkg/inst/tests/test_binary_function.R     |  101 -
 .../resources/R/pkg/inst/tests/test_broadcast.R |   48 -
 .../resources/R/pkg/inst/tests/test_client.R    |   36 -
 .../resources/R/pkg/inst/tests/test_context.R   |   57 -
 .../R/pkg/inst/tests/test_includeJAR.R          |   37 -
 .../R/pkg/inst/tests/test_includePackage.R      |   57 -
 .../resources/R/pkg/inst/tests/test_mllib.R     |   61 -
 .../R/pkg/inst/tests/test_parallelize_collect.R |  109 -
 .../main/resources/R/pkg/inst/tests/test_rdd.R  |  793 -------
 .../resources/R/pkg/inst/tests/test_shuffle.R   |  221 --
 .../resources/R/pkg/inst/tests/test_sparkSQL.R  | 1244 -----------
 .../main/resources/R/pkg/inst/tests/test_take.R |   66 -
 .../resources/R/pkg/inst/tests/test_textFile.R  |  161 --
 .../resources/R/pkg/inst/tests/test_utils.R     |  140 --
 .../main/resources/R/pkg/inst/worker/daemon.R   |   52 -
 .../main/resources/R/pkg/inst/worker/worker.R   |  177 --
 .../main/resources/R/pkg/src-native/Makefile    |   27 -
 .../resources/R/pkg/src-native/Makefile.win     |   27 -
 .../R/pkg/src-native/string_hash_code.c         |   49 -
 .../src/main/resources/R/pkg/tests/run-all.R    |   21 -
 .../src/main/resources/R/run-tests.sh           |   39 -
 sparkr-interpreter/src/main/resources/README.md |   24 +-
 .../src/main/resources/kernelR/sparkr_runner.R  |   64 +-
 .../src/main/resources/package-sparkR.sh        |   20 -
 .../src/main/resources/sparkr_bundle.tar.gz     |  Bin 455638 -> 0 bytes
 .../interpreter/sparkr/ReflectiveRBackend.scala |    4 +-
 .../interpreter/sparkr/SparkRProcess.scala      |    2 +-
 .../interpreter/sparkr/SparkRService.scala      |    3 +-
 66 files changed, 70 insertions(+), 14803 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/etc/examples/notebooks/magic-tutorial.ipynb
----------------------------------------------------------------------
diff --git a/etc/examples/notebooks/magic-tutorial.ipynb b/etc/examples/notebooks/magic-tutorial.ipynb
index e6d6a62..d128ea5 100644
--- a/etc/examples/notebooks/magic-tutorial.ipynb
+++ b/etc/examples/notebooks/magic-tutorial.ipynb
@@ -724,11 +724,7 @@
    },
    "outputs": [
     {
-     "data": {
-      "application/javascript": [
-       "alert(\"Hello, Magics!\")"
-      ]
-     },
+     "data": {},
      "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
@@ -950,4 +946,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/etc/examples/notebooks/meetup-streaming-toree.ipynb
----------------------------------------------------------------------
diff --git a/etc/examples/notebooks/meetup-streaming-toree.ipynb b/etc/examples/notebooks/meetup-streaming-toree.ipynb
index eec9072..5ce6c07 100644
--- a/etc/examples/notebooks/meetup-streaming-toree.ipynb
+++ b/etc/examples/notebooks/meetup-streaming-toree.ipynb
@@ -720,7 +720,9 @@
     "collapsed": true
    },
    "outputs": [],
-   "source": []
+   "source": [
+    ""
+   ]
   }
  ],
  "metadata": {
@@ -734,13 +736,13 @@
   },
   "urth": {
    "dashboard": {
-    "cellMargin": 10,
-    "defaultCellHeight": 20,
+    "cellMargin": 10.0,
+    "defaultCellHeight": 20.0,
     "layoutStrategy": "packed",
-    "maxColumns": 12
+    "maxColumns": 12.0
    }
   }
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/sparkr-interpreter/src/main/resources/R/.gitignore
----------------------------------------------------------------------
diff --git a/sparkr-interpreter/src/main/resources/R/.gitignore b/sparkr-interpreter/src/main/resources/R/.gitignore
deleted file mode 100644
index 9a5889b..0000000
--- a/sparkr-interpreter/src/main/resources/R/.gitignore
+++ /dev/null
@@ -1,6 +0,0 @@
-*.o
-*.so
-*.Rd
-lib
-pkg/man
-pkg/html

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/sparkr-interpreter/src/main/resources/R/DOCUMENTATION.md
----------------------------------------------------------------------
diff --git a/sparkr-interpreter/src/main/resources/R/DOCUMENTATION.md b/sparkr-interpreter/src/main/resources/R/DOCUMENTATION.md
deleted file mode 100644
index 931d015..0000000
--- a/sparkr-interpreter/src/main/resources/R/DOCUMENTATION.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# SparkR Documentation
-
-SparkR documentation is generated using in-source comments annotated using using
-`roxygen2`. After making changes to the documentation, to generate man pages,
-you can run the following from an R console in the SparkR home directory
-
-    library(devtools)
-    devtools::document(pkg="./pkg", roclets=c("rd"))
-
-You can verify if your changes are good by running
-
-    R CMD check pkg/

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/sparkr-interpreter/src/main/resources/R/README.md
----------------------------------------------------------------------
diff --git a/sparkr-interpreter/src/main/resources/R/README.md b/sparkr-interpreter/src/main/resources/R/README.md
deleted file mode 100644
index 005f56d..0000000
--- a/sparkr-interpreter/src/main/resources/R/README.md
+++ /dev/null
@@ -1,67 +0,0 @@
-# R on Spark
-
-SparkR is an R package that provides a light-weight frontend to use Spark from R.
-
-### SparkR development
-
-#### Build Spark
-
-Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
-```
-  build/mvn -DskipTests -Psparkr package
-```
-
-#### Running sparkR
-
-You can start using SparkR by launching the SparkR shell with
-
-    ./bin/sparkR
-
-The `sparkR` script automatically creates a SparkContext with Spark by default in
-local mode. To specify the Spark master of a cluster for the automatically created
-SparkContext, you can run
-
-    ./bin/sparkR --master "local[2]"
-
-To set other options like driver memory, executor memory etc. you can pass in the [spark-submit](http://spark.apache.org/docs/latest/submitting-applications.html) arguments to `./bin/sparkR`
-
-#### Using SparkR from RStudio
-
-If you wish to use SparkR from RStudio or other R frontends you will need to set some environment variables which point SparkR to your Spark installation. For example 
-```
-# Set this to where Spark is installed
-Sys.setenv(SPARK_HOME="/Users/shivaram/spark")
-# This line loads SparkR from the installed directory
-.libPaths(c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"), .libPaths()))
-library(SparkR)
-sc <- sparkR.init(master="local")
-```
-
-#### Making changes to SparkR
-
-The [instructions](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark) for making contributions to Spark also apply to SparkR.
-If you only make R file changes (i.e. no Scala changes) then you can just re-install the R package using `R/install-dev.sh` and test your changes.
-Once you have made your changes, please include unit tests for them and run existing unit tests using the `run-tests.sh` script as described below. 
-    
-#### Generating documentation
-
-The SparkR documentation (Rd files and HTML files) are not a part of the source repository. To generate them you can run the script `R/create-docs.sh`. This script uses `devtools` and `knitr` to generate the docs and these packages need to be installed on the machine before using the script.
-    
-### Examples, Unit tests
-
-SparkR comes with several sample programs in the `examples/src/main/r` directory.
-To run one of them, use `./bin/sparkR <filename> <args>`. For example:
-
-    ./bin/sparkR examples/src/main/r/dataframe.R
-
-You can also run the unit-tests for SparkR by running (you need to install the [testthat](http://cran.r-project.org/web/packages/testthat/index.html) package first):
-
-    R -e 'install.packages("testthat", repos="http://cran.us.r-project.org")'
-    ./R/run-tests.sh
-
-### Running on YARN
-The `./bin/spark-submit` and `./bin/sparkR` can also be used to submit jobs to YARN clusters. You will need to set YARN conf dir before doing so. For example on CDH you can run
-```
-export YARN_CONF_DIR=/etc/hadoop/conf
-./bin/spark-submit --master yarn examples/src/main/r/dataframe.R
-```

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/sparkr-interpreter/src/main/resources/R/WINDOWS.md
----------------------------------------------------------------------
diff --git a/sparkr-interpreter/src/main/resources/R/WINDOWS.md b/sparkr-interpreter/src/main/resources/R/WINDOWS.md
deleted file mode 100644
index 3f889c0..0000000
--- a/sparkr-interpreter/src/main/resources/R/WINDOWS.md
+++ /dev/null
@@ -1,13 +0,0 @@
-## Building SparkR on Windows
-
-To build SparkR on Windows, the following steps are required
-
-1. Install R (>= 3.1) and [Rtools](http://cran.r-project.org/bin/windows/Rtools/). Make sure to
-include Rtools and R in `PATH`.
-2. Install
-[JDK7](http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html) and set
-`JAVA_HOME` in the system environment variables.
-3. Download and install [Maven](http://maven.apache.org/download.html). Also include the `bin`
-directory in Maven in `PATH`.
-4. Set `MAVEN_OPTS` as described in [Building Spark](http://spark.apache.org/docs/latest/building-spark.html).
-5. Open a command shell (`cmd`) in the Spark directory and run `mvn -DskipTests -Psparkr package`

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/sparkr-interpreter/src/main/resources/R/create-docs.sh
----------------------------------------------------------------------
diff --git a/sparkr-interpreter/src/main/resources/R/create-docs.sh b/sparkr-interpreter/src/main/resources/R/create-docs.sh
deleted file mode 100755
index d2ae160..0000000
--- a/sparkr-interpreter/src/main/resources/R/create-docs.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Script to create API docs for SparkR
-# This requires `devtools` and `knitr` to be installed on the machine.
-
-# After running this script the html docs can be found in 
-# $SPARK_HOME/R/pkg/html
-
-set -o pipefail
-set -e
-
-# Figure out where the script is
-export FWDIR="$(cd "`dirname "$0"`"; pwd)"
-pushd $FWDIR
-
-# Install the package (this will also generate the Rd files)
-./install-dev.sh
-
-# Now create HTML files
-
-# knit_rd puts html in current working directory
-mkdir -p pkg/html
-pushd pkg/html
-
-Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
-
-popd
-
-popd

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/sparkr-interpreter/src/main/resources/R/install-dev.bat
----------------------------------------------------------------------
diff --git a/sparkr-interpreter/src/main/resources/R/install-dev.bat b/sparkr-interpreter/src/main/resources/R/install-dev.bat
deleted file mode 100644
index 008a5c6..0000000
--- a/sparkr-interpreter/src/main/resources/R/install-dev.bat
+++ /dev/null
@@ -1,27 +0,0 @@
-@echo off
-
-rem
-rem Licensed to the Apache Software Foundation (ASF) under one or more
-rem contributor license agreements.  See the NOTICE file distributed with
-rem this work for additional information regarding copyright ownership.
-rem The ASF licenses this file to You under the Apache License, Version 2.0
-rem (the "License"); you may not use this file except in compliance with
-rem the License.  You may obtain a copy of the License at
-rem
-rem    http://www.apache.org/licenses/LICENSE-2.0
-rem
-rem Unless required by applicable law or agreed to in writing, software
-rem distributed under the License is distributed on an "AS IS" BASIS,
-rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-rem See the License for the specific language governing permissions and
-rem limitations under the License.
-rem
-
-rem Install development version of SparkR
-rem
-
-set SPARK_HOME=%~dp0..
-
-MKDIR %SPARK_HOME%\R\lib
-
-R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib"  %SPARK_HOME%\R\pkg\

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/sparkr-interpreter/src/main/resources/R/install-dev.sh
----------------------------------------------------------------------
diff --git a/sparkr-interpreter/src/main/resources/R/install-dev.sh b/sparkr-interpreter/src/main/resources/R/install-dev.sh
deleted file mode 100755
index 59d98c9..0000000
--- a/sparkr-interpreter/src/main/resources/R/install-dev.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# This scripts packages the SparkR source files (R and C files) and
-# creates a package that can be loaded in R. The package is by default installed to
-# $FWDIR/lib and the package can be loaded by using the following command in R:
-#
-#   library(SparkR, lib.loc="$FWDIR/lib")
-#
-# NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory
-# to load the SparkR package on the worker nodes.
-
-set -o pipefail
-set -e
-
-FWDIR="$(cd `dirname $0`; pwd)"
-LIB_DIR="$FWDIR/lib"
-
-mkdir -p $LIB_DIR
-
-pushd $FWDIR > /dev/null
-
-# Generate Rd files if devtools is installed
-Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
-
-# Install SparkR to $LIB_DIR
-R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
-
-popd > /dev/null

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/sparkr-interpreter/src/main/resources/R/log4j.properties
----------------------------------------------------------------------
diff --git a/sparkr-interpreter/src/main/resources/R/log4j.properties b/sparkr-interpreter/src/main/resources/R/log4j.properties
deleted file mode 100644
index fa3e996..0000000
--- a/sparkr-interpreter/src/main/resources/R/log4j.properties
+++ /dev/null
@@ -1,28 +0,0 @@
-#
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License
-#
-
-# Set everything to be logged to the file target/unit-tests.log
-log4j.rootCategory=INFO, file
-log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=true
-log4j.appender.file.file=R/target/unit-tests.log
-log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
-
-# Ignore messages below warning level from Jetty, because it's a bit verbose
-log4j.logger.org.eclipse.jetty=WARN
-org.eclipse.jetty.LEVEL=WARN

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/sparkr-interpreter/src/main/resources/R/pkg/.lintr
----------------------------------------------------------------------
diff --git a/sparkr-interpreter/src/main/resources/R/pkg/.lintr b/sparkr-interpreter/src/main/resources/R/pkg/.lintr
deleted file mode 100644
index 038236f..0000000
--- a/sparkr-interpreter/src/main/resources/R/pkg/.lintr
+++ /dev/null
@@ -1,2 +0,0 @@
-linters: with_defaults(line_length_linter(100), camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
-exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/sparkr-interpreter/src/main/resources/R/pkg/DESCRIPTION
----------------------------------------------------------------------
diff --git a/sparkr-interpreter/src/main/resources/R/pkg/DESCRIPTION b/sparkr-interpreter/src/main/resources/R/pkg/DESCRIPTION
deleted file mode 100644
index d0d7201..0000000
--- a/sparkr-interpreter/src/main/resources/R/pkg/DESCRIPTION
+++ /dev/null
@@ -1,36 +0,0 @@
-Package: SparkR
-Type: Package
-Title: R frontend for Spark
-Version: 1.5.0
-Date: 2013-09-09
-Author: The Apache Software Foundation
-Maintainer: Shivaram Venkataraman <sh...@cs.berkeley.edu>
-Imports:
-    methods
-Depends:
-    R (>= 3.0),
-    methods,
-Suggests:
-    testthat
-Description: R frontend for Spark
-License: Apache License (== 2.0)
-Collate:
-    'schema.R'
-    'generics.R'
-    'jobj.R'
-    'RDD.R'
-    'pairRDD.R'
-    'column.R'
-    'group.R'
-    'DataFrame.R'
-    'SQLContext.R'
-    'backend.R'
-    'broadcast.R'
-    'client.R'
-    'context.R'
-    'deserialize.R'
-    'functions.R'
-    'mllib.R'
-    'serialize.R'
-    'sparkR.R'
-    'utils.R'

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/sparkr-interpreter/src/main/resources/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/sparkr-interpreter/src/main/resources/R/pkg/NAMESPACE b/sparkr-interpreter/src/main/resources/R/pkg/NAMESPACE
deleted file mode 100644
index e7f9770..0000000
--- a/sparkr-interpreter/src/main/resources/R/pkg/NAMESPACE
+++ /dev/null
@@ -1,259 +0,0 @@
-# Imports from base R
-importFrom(methods, setGeneric, setMethod, setOldClass)
-
-# Disable native libraries till we figure out how to package it
-# See SPARKR-7839
-#useDynLib(SparkR, stringHashCode)
-
-# S3 methods exported
-export("sparkR.init")
-export("sparkR.stop")
-export("print.jobj")
-
-# Needed exports for runner
-export("sparkR.connect")
-export("isInstanceOf")
-export("callJMethod")
-export("callJStatic")
-export("newJObject")
-export("removeJObject")
-export("isRemoveMethod")
-export("invokeJava")
-
-# MLlib integration
-exportMethods("glm",
-              "predict",
-              "summary")
-
-# Job group lifecycle management methods
-export("setJobGroup",
-       "clearJobGroup",
-       "cancelJobGroup")
-
-exportClasses("DataFrame")
-
-exportMethods("arrange",
-              "cache",
-              "collect",
-              "columns",
-              "count",
-              "crosstab",
-              "describe",
-              "dim",
-              "distinct",
-              "dropna",
-              "dtypes",
-              "except",
-              "explain",
-              "fillna",
-              "filter",
-              "first",
-              "group_by",
-              "groupBy",
-              "head",
-              "insertInto",
-              "intersect",
-              "isLocal",
-              "join",
-              "limit",
-              "merge",
-              "mutate",
-              "na.omit",
-              "names",
-              "ncol",
-              "nrow",
-              "orderBy",
-              "persist",
-              "printSchema",
-              "rbind",
-              "registerTempTable",
-              "rename",
-              "repartition",
-              "sample",
-              "sample_frac",
-              "saveAsParquetFile",
-              "saveAsTable",
-              "saveDF",
-              "schema",
-              "select",
-              "selectExpr",
-              "show",
-              "showDF",
-              "subset",
-              "summarize",
-              "summary",
-              "take",
-              "transform",
-              "unionAll",
-              "unique",
-              "unpersist",
-              "where",
-              "withColumn",
-              "withColumnRenamed",
-              "write.df")
-
-exportClasses("Column")
-
-exportMethods("%in%",
-              "abs",
-              "acos",
-              "add_months",
-              "alias",
-              "approxCountDistinct",
-              "asc",
-              "ascii",
-              "asin",
-              "atan",
-              "atan2",
-              "avg",
-              "base64",
-              "between",
-              "bin",
-              "bitwiseNOT",
-              "cast",
-              "cbrt",
-              "ceil",
-              "ceiling",
-              "concat",
-              "concat_ws",
-              "contains",
-              "conv",
-              "cos",
-              "cosh",
-              "count",
-              "countDistinct",
-              "crc32",
-              "date_add",
-              "date_format",
-              "date_sub",
-              "datediff",
-              "dayofmonth",
-              "dayofyear",
-              "desc",
-              "endsWith",
-              "exp",
-              "explode",
-              "expm1",
-              "expr",
-              "factorial",
-              "first",
-              "floor",
-              "format_number",
-              "format_string",
-              "from_unixtime",
-              "from_utc_timestamp",
-              "getField",
-              "getItem",
-              "greatest",
-              "hex",
-              "hour",
-              "hypot",
-              "ifelse",
-              "initcap",
-              "instr",
-              "isNaN",
-              "isNotNull",
-              "isNull",
-              "last",
-              "last_day",
-              "least",
-              "length",
-              "levenshtein",
-              "like",
-              "lit",
-              "locate",
-              "log",
-              "log10",
-              "log1p",
-              "log2",
-              "lower",
-              "lpad",
-              "ltrim",
-              "max",
-              "md5",
-              "mean",
-              "min",
-              "minute",
-              "month",
-              "months_between",
-              "n",
-              "n_distinct",
-              "nanvl",
-              "negate",
-              "next_day",
-              "otherwise",
-              "pmod",
-              "quarter",
-              "rand",
-              "randn",
-              "regexp_extract",
-              "regexp_replace",
-              "reverse",
-              "rint",
-              "rlike",
-              "round",
-              "rpad",
-              "rtrim",
-              "second",
-              "sha1",
-              "sha2",
-              "shiftLeft",
-              "shiftRight",
-              "shiftRightUnsigned",
-              "sign",
-              "signum",
-              "sin",
-              "sinh",
-              "size",
-              "soundex",
-              "sqrt",
-              "startsWith",
-              "substr",
-              "substring_index",
-              "sum",
-              "sumDistinct",
-              "tan",
-              "tanh",
-              "toDegrees",
-              "toRadians",
-              "to_date",
-              "to_utc_timestamp",
-              "translate",
-              "trim",
-              "unbase64",
-              "unhex",
-              "unix_timestamp",
-              "upper",
-              "weekofyear",
-              "when",
-              "year")
-
-exportClasses("GroupedData")
-exportMethods("agg")
-
-export("sparkRSQL.init",
-       "sparkRHive.init")
-
-export("cacheTable",
-       "clearCache",
-       "createDataFrame",
-       "createExternalTable",
-       "dropTempTable",
-       "jsonFile",
-       "loadDF",
-       "parquetFile",
-       "read.df",
-       "sql",
-       "table",
-       "tableNames",
-       "tables",
-       "uncacheTable")
-
-export("structField",
-       "structField.jobj",
-       "structField.character",
-       "print.structField",
-       "structType",
-       "structType.jobj",
-       "structType.structField",
-       "print.structType")

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/2eb26cd3/sparkr-interpreter/src/main/resources/R/pkg/R/DataFrame.R
----------------------------------------------------------------------
diff --git a/sparkr-interpreter/src/main/resources/R/pkg/R/DataFrame.R b/sparkr-interpreter/src/main/resources/R/pkg/R/DataFrame.R
deleted file mode 100644
index 5605124..0000000
--- a/sparkr-interpreter/src/main/resources/R/pkg/R/DataFrame.R
+++ /dev/null
@@ -1,1819 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# DataFrame.R - DataFrame class and methods implemented in S4 OO classes
-
-#' @include generics.R jobj.R schema.R RDD.R pairRDD.R column.R group.R
-NULL
-
-setOldClass("jobj")
-
-#' @title S4 class that represents a DataFrame
-#' @description DataFrames can be created using functions like
-#'              \code{jsonFile}, \code{table} etc.
-#' @rdname DataFrame
-#' @seealso jsonFile, table
-#' @docType class
-#'
-#' @slot env An R environment that stores bookkeeping states of the DataFrame
-#' @slot sdf A Java object reference to the backing Scala DataFrame
-#' @export
-setClass("DataFrame",
-         slots = list(env = "environment",
-                      sdf = "jobj"))
-
-setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
-  .Object@env <- new.env()
-  .Object@env$isCached <- isCached
-
-  .Object@sdf <- sdf
-  .Object
-})
-
-#' @rdname DataFrame
-#' @export
-#'
-#' @param sdf A Java object reference to the backing Scala DataFrame
-#' @param isCached TRUE if the dataFrame is cached
-dataFrame <- function(sdf, isCached = FALSE) {
-  new("DataFrame", sdf, isCached)
-}
-
-############################ DataFrame Methods ##############################################
-
-#' Print Schema of a DataFrame
-#'
-#' Prints out the schema in tree format
-#'
-#' @param x A SparkSQL DataFrame
-#'
-#' @rdname printSchema
-#' @name printSchema
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' printSchema(df)
-#'}
-setMethod("printSchema",
-          signature(x = "DataFrame"),
-          function(x) {
-            schemaString <- callJMethod(schema(x)$jobj, "treeString")
-            cat(schemaString)
-          })
-
-#' Get schema object
-#'
-#' Returns the schema of this DataFrame as a structType object.
-#'
-#' @param x A SparkSQL DataFrame
-#'
-#' @rdname schema
-#' @name schema
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' dfSchema <- schema(df)
-#'}
-setMethod("schema",
-          signature(x = "DataFrame"),
-          function(x) {
-            structType(callJMethod(x@sdf, "schema"))
-          })
-
-#' Explain
-#'
-#' Print the logical and physical Catalyst plans to the console for debugging.
-#'
-#' @param x A SparkSQL DataFrame
-#' @param extended Logical. If extended is False, explain() only prints the physical plan.
-#' @rdname explain
-#' @name explain
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' explain(df, TRUE)
-#'}
-setMethod("explain",
-          signature(x = "DataFrame"),
-          function(x, extended = FALSE) {
-            queryExec <- callJMethod(x@sdf, "queryExecution")
-            if (extended) {
-              cat(callJMethod(queryExec, "toString"))
-            } else {
-              execPlan <- callJMethod(queryExec, "executedPlan")
-              cat(callJMethod(execPlan, "toString"))
-            }
-          })
-
-#' isLocal
-#'
-#' Returns True if the `collect` and `take` methods can be run locally
-#' (without any Spark executors).
-#'
-#' @param x A SparkSQL DataFrame
-#'
-#' @rdname isLocal
-#' @name isLocal
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' isLocal(df)
-#'}
-setMethod("isLocal",
-          signature(x = "DataFrame"),
-          function(x) {
-            callJMethod(x@sdf, "isLocal")
-          })
-
-#' showDF
-#'
-#' Print the first numRows rows of a DataFrame
-#'
-#' @param x A SparkSQL DataFrame
-#' @param numRows The number of rows to print. Defaults to 20.
-#'
-#' @rdname showDF
-#' @name showDF
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' showDF(df)
-#'}
-setMethod("showDF",
-          signature(x = "DataFrame"),
-          function(x, numRows = 20, truncate = TRUE) {
-            s <- callJMethod(x@sdf, "showString", numToInt(numRows), truncate)
-            cat(s)
-          })
-
-#' show
-#'
-#' Print the DataFrame column names and types
-#'
-#' @param x A SparkSQL DataFrame
-#'
-#' @rdname show
-#' @name show
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' df
-#'}
-setMethod("show", "DataFrame",
-          function(object) {
-            cols <- lapply(dtypes(object), function(l) {
-              paste(l, collapse = ":")
-            })
-            s <- paste(cols, collapse = ", ")
-            cat(paste("DataFrame[", s, "]\n", sep = ""))
-          })
-
-#' DataTypes
-#'
-#' Return all column names and their data types as a list
-#'
-#' @param x A SparkSQL DataFrame
-#'
-#' @rdname dtypes
-#' @name dtypes
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' dtypes(df)
-#'}
-setMethod("dtypes",
-          signature(x = "DataFrame"),
-          function(x) {
-            lapply(schema(x)$fields(), function(f) {
-              c(f$name(), f$dataType.simpleString())
-            })
-          })
-
-#' Column names
-#'
-#' Return all column names as a list
-#'
-#' @param x A SparkSQL DataFrame
-#'
-#' @rdname columns
-#' @name columns
-#' @aliases names
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' columns(df)
-#'}
-setMethod("columns",
-          signature(x = "DataFrame"),
-          function(x) {
-            sapply(schema(x)$fields(), function(f) {
-              f$name()
-            })
-          })
-
-#' @rdname columns
-#' @name names
-setMethod("names",
-          signature(x = "DataFrame"),
-          function(x) {
-            columns(x)
-          })
-
-#' @rdname columns
-#' @name names<-
-setMethod("names<-",
-          signature(x = "DataFrame"),
-          function(x, value) {
-            if (!is.null(value)) {
-              sdf <- callJMethod(x@sdf, "toDF", listToSeq(as.list(value)))
-              dataFrame(sdf)
-            }
-          })
-
-#' Register Temporary Table
-#'
-#' Registers a DataFrame as a Temporary Table in the SQLContext
-#'
-#' @param x A SparkSQL DataFrame
-#' @param tableName A character vector containing the name of the table
-#'
-#' @rdname registerTempTable
-#' @name registerTempTable
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' registerTempTable(df, "json_df")
-#' new_df <- sql(sqlContext, "SELECT * FROM json_df")
-#'}
-setMethod("registerTempTable",
-          signature(x = "DataFrame", tableName = "character"),
-          function(x, tableName) {
-              invisible(callJMethod(x@sdf, "registerTempTable", tableName))
-          })
-
-#' insertInto
-#'
-#' Insert the contents of a DataFrame into a table registered in the current SQL Context.
-#'
-#' @param x A SparkSQL DataFrame
-#' @param tableName A character vector containing the name of the table
-#' @param overwrite A logical argument indicating whether or not to overwrite
-#' the existing rows in the table.
-#'
-#' @rdname insertInto
-#' @name insertInto
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' df <- read.df(sqlContext, path, "parquet")
-#' df2 <- read.df(sqlContext, path2, "parquet")
-#' registerTempTable(df, "table1")
-#' insertInto(df2, "table1", overwrite = TRUE)
-#'}
-setMethod("insertInto",
-          signature(x = "DataFrame", tableName = "character"),
-          function(x, tableName, overwrite = FALSE) {
-            callJMethod(x@sdf, "insertInto", tableName, overwrite)
-          })
-
-#' Cache
-#'
-#' Persist with the default storage level (MEMORY_ONLY).
-#'
-#' @param x A SparkSQL DataFrame
-#'
-#' @rdname cache
-#' @name cache
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' cache(df)
-#'}
-setMethod("cache",
-          signature(x = "DataFrame"),
-          function(x) {
-            cached <- callJMethod(x@sdf, "cache")
-            x@env$isCached <- TRUE
-            x
-          })
-
-#' Persist
-#'
-#' Persist this DataFrame with the specified storage level. For details of the
-#' supported storage levels, refer to
-#' http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence.
-#'
-#' @param x The DataFrame to persist
-#' @rdname persist
-#' @name persist
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' persist(df, "MEMORY_AND_DISK")
-#'}
-setMethod("persist",
-          signature(x = "DataFrame", newLevel = "character"),
-          function(x, newLevel) {
-            callJMethod(x@sdf, "persist", getStorageLevel(newLevel))
-            x@env$isCached <- TRUE
-            x
-          })
-
-#' Unpersist
-#'
-#' Mark this DataFrame as non-persistent, and remove all blocks for it from memory and
-#' disk.
-#'
-#' @param x The DataFrame to unpersist
-#' @param blocking Whether to block until all blocks are deleted
-#' @rdname unpersist-methods
-#' @name unpersist
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' persist(df, "MEMORY_AND_DISK")
-#' unpersist(df)
-#'}
-setMethod("unpersist",
-          signature(x = "DataFrame"),
-          function(x, blocking = TRUE) {
-            callJMethod(x@sdf, "unpersist", blocking)
-            x@env$isCached <- FALSE
-            x
-          })
-
-#' Repartition
-#'
-#' Return a new DataFrame that has exactly numPartitions partitions.
-#'
-#' @param x A SparkSQL DataFrame
-#' @param numPartitions The number of partitions to use.
-#' @rdname repartition
-#' @name repartition
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' newDF <- repartition(df, 2L)
-#'}
-setMethod("repartition",
-          signature(x = "DataFrame", numPartitions = "numeric"),
-          function(x, numPartitions) {
-            sdf <- callJMethod(x@sdf, "repartition", numToInt(numPartitions))
-            dataFrame(sdf)
-          })
-
-# toJSON
-#
-# Convert the rows of a DataFrame into JSON objects and return an RDD where
-# each element contains a JSON string.
-#
-#@param x A SparkSQL DataFrame
-# @return A StringRRDD of JSON objects
-# @rdname tojson
-# @export
-# @examples
-#\dontrun{
-# sc <- sparkR.init()
-# sqlContext <- sparkRSQL.init(sc)
-# path <- "path/to/file.json"
-# df <- jsonFile(sqlContext, path)
-# newRDD <- toJSON(df)
-#}
-setMethod("toJSON",
-          signature(x = "DataFrame"),
-          function(x) {
-            rdd <- callJMethod(x@sdf, "toJSON")
-            jrdd <- callJMethod(rdd, "toJavaRDD")
-            RDD(jrdd, serializedMode = "string")
-          })
-
-#' saveAsParquetFile
-#'
-#' Save the contents of a DataFrame as a Parquet file, preserving the schema. Files written out
-#' with this method can be read back in as a DataFrame using parquetFile().
-#'
-#' @param x A SparkSQL DataFrame
-#' @param path The directory where the file is saved
-#' @rdname saveAsParquetFile
-#' @name saveAsParquetFile
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' saveAsParquetFile(df, "/tmp/sparkr-tmp/")
-#'}
-setMethod("saveAsParquetFile",
-          signature(x = "DataFrame", path = "character"),
-          function(x, path) {
-            invisible(callJMethod(x@sdf, "saveAsParquetFile", path))
-          })
-
-#' Distinct
-#'
-#' Return a new DataFrame containing the distinct rows in this DataFrame.
-#'
-#' @param x A SparkSQL DataFrame
-#' @rdname distinct
-#' @name distinct
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' distinctDF <- distinct(df)
-#'}
-setMethod("distinct",
-          signature(x = "DataFrame"),
-          function(x) {
-            sdf <- callJMethod(x@sdf, "distinct")
-            dataFrame(sdf)
-          })
-
-#' @title Distinct rows in a DataFrame
-#
-#' @description Returns a new DataFrame containing distinct rows in this DataFrame
-#'
-#' @rdname unique
-#' @name unique
-#' @aliases distinct
-setMethod("unique",
-          signature(x = "DataFrame"),
-          function(x) {
-            distinct(x)
-          })
-
-#' Sample
-#'
-#' Return a sampled subset of this DataFrame using a random seed.
-#'
-#' @param x A SparkSQL DataFrame
-#' @param withReplacement Sampling with replacement or not
-#' @param fraction The (rough) sample target fraction
-#' @rdname sample
-#' @aliases sample_frac
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' collect(sample(df, FALSE, 0.5))
-#' collect(sample(df, TRUE, 0.5))
-#'}
-setMethod("sample",
-          # TODO : Figure out how to send integer as java.lang.Long to JVM so
-          # we can send seed as an argument through callJMethod
-          signature(x = "DataFrame", withReplacement = "logical",
-                    fraction = "numeric"),
-          function(x, withReplacement, fraction) {
-            if (fraction < 0.0) stop(cat("Negative fraction value:", fraction))
-            sdf <- callJMethod(x@sdf, "sample", withReplacement, fraction)
-            dataFrame(sdf)
-          })
-
-#' @rdname sample
-#' @name sample_frac
-setMethod("sample_frac",
-          signature(x = "DataFrame", withReplacement = "logical",
-                    fraction = "numeric"),
-          function(x, withReplacement, fraction) {
-            sample(x, withReplacement, fraction)
-          })
-
-#' Count
-#'
-#' Returns the number of rows in a DataFrame
-#'
-#' @param x A SparkSQL DataFrame
-#'
-#' @rdname count
-#' @name count
-#' @aliases nrow
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' count(df)
-#' }
-setMethod("count",
-          signature(x = "DataFrame"),
-          function(x) {
-            callJMethod(x@sdf, "count")
-          })
-
-#' @title Number of rows for a DataFrame
-#' @description Returns number of rows in a DataFrames
-#'
-#' @name nrow
-#'
-#' @rdname nrow
-#' @aliases count
-setMethod("nrow",
-          signature(x = "DataFrame"),
-          function(x) {
-            count(x)
-          })
-
-#' Returns the number of columns in a DataFrame
-#'
-#' @param x a SparkSQL DataFrame
-#'
-#' @rdname ncol
-#' @name ncol
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' ncol(df)
-#' }
-setMethod("ncol",
-          signature(x = "DataFrame"),
-          function(x) {
-            length(columns(x))
-          })
-
-#' Returns the dimensions (number of rows and columns) of a DataFrame
-#' @param x a SparkSQL DataFrame
-#'
-#' @rdname dim
-#' @name dim
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' dim(df)
-#' }
-setMethod("dim",
-          signature(x = "DataFrame"),
-          function(x) {
-            c(count(x), ncol(x))
-          })
-
-#' Collects all the elements of a Spark DataFrame and coerces them into an R data.frame.
-#'
-#' @param x A SparkSQL DataFrame
-#' @param stringsAsFactors (Optional) A logical indicating whether or not string columns
-#' should be converted to factors. FALSE by default.
-#' @rdname collect
-#' @name collect
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' collected <- collect(df)
-#' firstName <- collected[[1]]$name
-#' }
-setMethod("collect",
-          signature(x = "DataFrame"),
-          function(x, stringsAsFactors = FALSE) {
-            # listCols is a list of raw vectors, one per column
-            listCols <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "dfToCols", x@sdf)
-            cols <- lapply(listCols, function(col) {
-              objRaw <- rawConnection(col)
-              numRows <- readInt(objRaw)
-              col <- readCol(objRaw, numRows)
-              close(objRaw)
-              col
-            })
-            names(cols) <- columns(x)
-            do.call(cbind.data.frame, list(cols, stringsAsFactors = stringsAsFactors))
-          })
-
-#' Limit
-#'
-#' Limit the resulting DataFrame to the number of rows specified.
-#'
-#' @param x A SparkSQL DataFrame
-#' @param num The number of rows to return
-#' @return A new DataFrame containing the number of rows specified.
-#'
-#' @rdname limit
-#' @name limit
-#' @export
-#' @examples
-#' \dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' limitedDF <- limit(df, 10)
-#' }
-setMethod("limit",
-          signature(x = "DataFrame", num = "numeric"),
-          function(x, num) {
-            res <- callJMethod(x@sdf, "limit", as.integer(num))
-            dataFrame(res)
-          })
-
-#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
-#'
-#' @rdname take
-#' @name take
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' take(df, 2)
-#' }
-setMethod("take",
-          signature(x = "DataFrame", num = "numeric"),
-          function(x, num) {
-            limited <- limit(x, num)
-            collect(limited)
-          })
-
-#' Head
-#'
-#' Return the first NUM rows of a DataFrame as a data.frame. If NUM is NULL,
-#' then head() returns the first 6 rows in keeping with the current data.frame
-#' convention in R.
-#'
-#' @param x A SparkSQL DataFrame
-#' @param num The number of rows to return. Default is 6.
-#' @return A data.frame
-#'
-#' @rdname head
-#' @name head
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' head(df)
-#' }
-setMethod("head",
-          signature(x = "DataFrame"),
-          function(x, num = 6L) {
-          # Default num is 6L in keeping with R's data.frame convention
-            take(x, num)
-          })
-
-#' Return the first row of a DataFrame
-#'
-#' @param x A SparkSQL DataFrame
-#'
-#' @rdname first
-#' @name first
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' first(df)
-#' }
-setMethod("first",
-          signature(x = "DataFrame"),
-          function(x) {
-            take(x, 1)
-          })
-
-# toRDD
-#
-# Converts a Spark DataFrame to an RDD while preserving column names.
-#
-# @param x A Spark DataFrame
-#
-# @rdname DataFrame
-# @export
-# @examples
-#\dontrun{
-# sc <- sparkR.init()
-# sqlContext <- sparkRSQL.init(sc)
-# path <- "path/to/file.json"
-# df <- jsonFile(sqlContext, path)
-# rdd <- toRDD(df)
-# }
-setMethod("toRDD",
-          signature(x = "DataFrame"),
-          function(x) {
-            jrdd <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "dfToRowRDD", x@sdf)
-            colNames <- callJMethod(x@sdf, "columns")
-            rdd <- RDD(jrdd, serializedMode = "row")
-            lapply(rdd, function(row) {
-              names(row) <- colNames
-              row
-            })
-          })
-
-#' GroupBy
-#'
-#' Groups the DataFrame using the specified columns, so we can run aggregation on them.
-#'
-#' @param x a DataFrame
-#' @return a GroupedData
-#' @seealso GroupedData
-#' @aliases group_by
-#' @rdname groupBy
-#' @name groupBy
-#' @export
-#' @examples
-#' \dontrun{
-#'   # Compute the average for all numeric columns grouped by department.
-#'   avg(groupBy(df, "department"))
-#'
-#'   # Compute the max age and average salary, grouped by department and gender.
-#'   agg(groupBy(df, "department", "gender"), salary="avg", "age" -> "max")
-#' }
-setMethod("groupBy",
-           signature(x = "DataFrame"),
-           function(x, ...) {
-             cols <- list(...)
-             if (length(cols) >= 1 && class(cols[[1]]) == "character") {
-               sgd <- callJMethod(x@sdf, "groupBy", cols[[1]], listToSeq(cols[-1]))
-             } else {
-               jcol <- lapply(cols, function(c) { c@jc })
-               sgd <- callJMethod(x@sdf, "groupBy", listToSeq(jcol))
-             }
-             groupedData(sgd)
-           })
-
-#' @rdname groupBy
-#' @name group_by
-setMethod("group_by",
-          signature(x = "DataFrame"),
-          function(x, ...) {
-            groupBy(x, ...)
-          })
-
-#' Summarize data across columns
-#'
-#' Compute aggregates by specifying a list of columns
-#'
-#' @param x a DataFrame
-#' @rdname agg
-#' @name agg
-#' @aliases summarize
-#' @export
-setMethod("agg",
-          signature(x = "DataFrame"),
-          function(x, ...) {
-            agg(groupBy(x), ...)
-          })
-
-#' @rdname agg
-#' @name summarize
-setMethod("summarize",
-          signature(x = "DataFrame"),
-          function(x, ...) {
-            agg(x, ...)
-          })
-
-
-############################## RDD Map Functions ##################################
-# All of the following functions mirror the existing RDD map functions,           #
-# but allow for use with DataFrames by first converting to an RRDD before calling #
-# the requested map function.                                                     #
-###################################################################################
-
-# @rdname lapply
-setMethod("lapply",
-          signature(X = "DataFrame", FUN = "function"),
-          function(X, FUN) {
-            rdd <- toRDD(X)
-            lapply(rdd, FUN)
-          })
-
-# @rdname lapply
-setMethod("map",
-          signature(X = "DataFrame", FUN = "function"),
-          function(X, FUN) {
-            lapply(X, FUN)
-          })
-
-# @rdname flatMap
-setMethod("flatMap",
-          signature(X = "DataFrame", FUN = "function"),
-          function(X, FUN) {
-            rdd <- toRDD(X)
-            flatMap(rdd, FUN)
-          })
-
-# @rdname lapplyPartition
-setMethod("lapplyPartition",
-          signature(X = "DataFrame", FUN = "function"),
-          function(X, FUN) {
-            rdd <- toRDD(X)
-            lapplyPartition(rdd, FUN)
-          })
-
-# @rdname lapplyPartition
-setMethod("mapPartitions",
-          signature(X = "DataFrame", FUN = "function"),
-          function(X, FUN) {
-            lapplyPartition(X, FUN)
-          })
-
-# @rdname foreach
-setMethod("foreach",
-          signature(x = "DataFrame", func = "function"),
-          function(x, func) {
-            rdd <- toRDD(x)
-            foreach(rdd, func)
-          })
-
-# @rdname foreach
-setMethod("foreachPartition",
-          signature(x = "DataFrame", func = "function"),
-          function(x, func) {
-            rdd <- toRDD(x)
-            foreachPartition(rdd, func)
-          })
-
-
-############################## SELECT ##################################
-
-getColumn <- function(x, c) {
-  column(callJMethod(x@sdf, "col", c))
-}
-
-#' @rdname select
-#' @name $
-setMethod("$", signature(x = "DataFrame"),
-          function(x, name) {
-            getColumn(x, name)
-          })
-
-#' @rdname select
-#' @name $<-
-setMethod("$<-", signature(x = "DataFrame"),
-          function(x, name, value) {
-            stopifnot(class(value) == "Column" || is.null(value))
-            cols <- columns(x)
-            if (name %in% cols) {
-              if (is.null(value)) {
-                cols <- Filter(function(c) { c != name }, cols)
-              }
-              cols <- lapply(cols, function(c) {
-                if (c == name) {
-                  alias(value, name)
-                } else {
-                  col(c)
-                }
-              })
-              nx <- select(x, cols)
-            } else {
-              if (is.null(value)) {
-                return(x)
-              }
-              nx <- withColumn(x, name, value)
-            }
-            x@sdf <- nx@sdf
-            x
-          })
-
-setClassUnion("numericOrcharacter", c("numeric", "character"))
-
-#' @rdname subset
-#' @name [[
-setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
-          function(x, i) {
-            if (is.numeric(i)) {
-              cols <- columns(x)
-              i <- cols[[i]]
-            }
-            getColumn(x, i)
-          })
-
-#' @rdname subset
-#' @name [
-setMethod("[", signature(x = "DataFrame", i = "missing"),
-          function(x, i, j, ...) {
-            if (is.numeric(j)) {
-              cols <- columns(x)
-              j <- cols[j]
-            }
-            if (length(j) > 1) {
-              j <- as.list(j)
-            }
-            select(x, j)
-          })
-
-#' @rdname subset
-#' @name [
-setMethod("[", signature(x = "DataFrame", i = "Column"),
-          function(x, i, j, ...) {
-            # It could handle i as "character" but it seems confusing and not required
-            # https://stat.ethz.ch/R-manual/R-devel/library/base/html/Extract.data.frame.html
-            filtered <- filter(x, i)
-            if (!missing(j)) {
-              filtered[, j, ...]
-            } else {
-              filtered
-            }
-          })
-
-#' Subset
-#'
-#' Return subsets of DataFrame according to given conditions
-#' @param x A DataFrame
-#' @param subset A logical expression to filter on rows
-#' @param select expression for the single Column or a list of columns to select from the DataFrame
-#' @return A new DataFrame containing only the rows that meet the condition with selected columns
-#' @export
-#' @rdname subset
-#' @name subset
-#' @aliases [
-#' @family subsetting functions
-#' @examples
-#' \dontrun{
-#'   # Columns can be selected using `[[` and `[`
-#'   df[[2]] == df[["age"]]
-#'   df[,2] == df[,"age"]
-#'   df[,c("name", "age")]
-#'   # Or to filter rows
-#'   df[df$age > 20,]
-#'   # DataFrame can be subset on both rows and Columns
-#'   df[df$name == "Smith", c(1,2)]
-#'   df[df$age %in% c(19, 30), 1:2]
-#'   subset(df, df$age %in% c(19, 30), 1:2)
-#'   subset(df, df$age %in% c(19), select = c(1,2))
-#' }
-setMethod("subset", signature(x = "DataFrame"),
-          function(x, subset, select, ...) {
-            x[subset, select, ...]
-          })
-
-#' Select
-#'
-#' Selects a set of columns with names or Column expressions.
-#' @param x A DataFrame
-#' @param col A list of columns or single Column or name
-#' @return A new DataFrame with selected columns
-#' @export
-#' @rdname select
-#' @name select
-#' @family subsetting functions
-#' @examples
-#' \dontrun{
-#'   select(df, "*")
-#'   select(df, "col1", "col2")
-#'   select(df, df$name, df$age + 1)
-#'   select(df, c("col1", "col2"))
-#'   select(df, list(df$name, df$age + 1))
-#'   # Similar to R data frames columns can also be selected using `$`
-#'   df$age
-#' }
-setMethod("select", signature(x = "DataFrame", col = "character"),
-          function(x, col, ...) {
-            sdf <- callJMethod(x@sdf, "select", col, toSeq(...))
-            dataFrame(sdf)
-          })
-
-#' @rdname select
-#' @export
-setMethod("select", signature(x = "DataFrame", col = "Column"),
-          function(x, col, ...) {
-            jcols <- lapply(list(col, ...), function(c) {
-              c@jc
-            })
-            sdf <- callJMethod(x@sdf, "select", listToSeq(jcols))
-            dataFrame(sdf)
-          })
-
-#' @rdname select
-#' @export
-setMethod("select",
-          signature(x = "DataFrame", col = "list"),
-          function(x, col) {
-            cols <- lapply(col, function(c) {
-              if (class(c) == "Column") {
-                c@jc
-              } else {
-                col(c)@jc
-              }
-            })
-            sdf <- callJMethod(x@sdf, "select", listToSeq(cols))
-            dataFrame(sdf)
-          })
-
-#' SelectExpr
-#'
-#' Select from a DataFrame using a set of SQL expressions.
-#'
-#' @param x A DataFrame to be selected from.
-#' @param expr A string containing a SQL expression
-#' @param ... Additional expressions
-#' @return A DataFrame
-#' @rdname selectExpr
-#' @name selectExpr
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' selectExpr(df, "col1", "(col2 * 5) as newCol")
-#' }
-setMethod("selectExpr",
-          signature(x = "DataFrame", expr = "character"),
-          function(x, expr, ...) {
-            exprList <- list(expr, ...)
-            sdf <- callJMethod(x@sdf, "selectExpr", listToSeq(exprList))
-            dataFrame(sdf)
-          })
-
-#' WithColumn
-#'
-#' Return a new DataFrame with the specified column added.
-#'
-#' @param x A DataFrame
-#' @param colName A string containing the name of the new column.
-#' @param col A Column expression.
-#' @return A DataFrame with the new column added.
-#' @rdname withColumn
-#' @name withColumn
-#' @aliases mutate transform
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' newDF <- withColumn(df, "newCol", df$col1 * 5)
-#' }
-setMethod("withColumn",
-          signature(x = "DataFrame", colName = "character", col = "Column"),
-          function(x, colName, col) {
-            select(x, x$"*", alias(col, colName))
-          })
-
-#' Mutate
-#'
-#' Return a new DataFrame with the specified columns added.
-#'
-#' @param .data A DataFrame
-#' @param col a named argument of the form name = col
-#' @return A new DataFrame with the new columns added.
-#' @rdname withColumn
-#' @name mutate
-#' @aliases withColumn transform
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' newDF <- mutate(df, newCol = df$col1 * 5, newCol2 = df$col1 * 2)
-#' names(newDF) # Will contain newCol, newCol2
-#' newDF2 <- transform(df, newCol = df$col1 / 5, newCol2 = df$col1 * 2)
-#' }
-setMethod("mutate",
-          signature(.data = "DataFrame"),
-          function(.data, ...) {
-            x <- .data
-            cols <- list(...)
-            stopifnot(length(cols) > 0)
-            stopifnot(class(cols[[1]]) == "Column")
-            ns <- names(cols)
-            if (!is.null(ns)) {
-              for (n in ns) {
-                if (n != "") {
-                  cols[[n]] <- alias(cols[[n]], n)
-                }
-              }
-            }
-            do.call(select, c(x, x$"*", cols))
-          })
-
-#' @export
-#' @rdname withColumn
-#' @name transform
-#' @aliases withColumn mutate
-setMethod("transform",
-          signature(`_data` = "DataFrame"),
-          function(`_data`, ...) {
-            mutate(`_data`, ...)
-          })
-
-#' WithColumnRenamed
-#'
-#' Rename an existing column in a DataFrame.
-#'
-#' @param x A DataFrame
-#' @param existingCol The name of the column you want to change.
-#' @param newCol The new column name.
-#' @return A DataFrame with the column name changed.
-#' @rdname withColumnRenamed
-#' @name withColumnRenamed
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' newDF <- withColumnRenamed(df, "col1", "newCol1")
-#' }
-setMethod("withColumnRenamed",
-          signature(x = "DataFrame", existingCol = "character", newCol = "character"),
-          function(x, existingCol, newCol) {
-            cols <- lapply(columns(x), function(c) {
-              if (c == existingCol) {
-                alias(col(c), newCol)
-              } else {
-                col(c)
-              }
-            })
-            select(x, cols)
-          })
-
-#' Rename
-#'
-#' Rename an existing column in a DataFrame.
-#'
-#' @param x A DataFrame
-#' @param newCol A named pair of the form new_column_name = existing_column
-#' @return A DataFrame with the column name changed.
-#' @rdname withColumnRenamed
-#' @name rename
-#' @aliases withColumnRenamed
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' newDF <- rename(df, col1 = df$newCol1)
-#' }
-setMethod("rename",
-          signature(x = "DataFrame"),
-          function(x, ...) {
-            renameCols <- list(...)
-            stopifnot(length(renameCols) > 0)
-            stopifnot(class(renameCols[[1]]) == "Column")
-            newNames <- names(renameCols)
-            oldNames <- lapply(renameCols, function(col) {
-              callJMethod(col@jc, "toString")
-            })
-            cols <- lapply(columns(x), function(c) {
-              if (c %in% oldNames) {
-                alias(col(c), newNames[[match(c, oldNames)]])
-              } else {
-                col(c)
-              }
-            })
-            select(x, cols)
-          })
-
-setClassUnion("characterOrColumn", c("character", "Column"))
-
-#' Arrange
-#'
-#' Sort a DataFrame by the specified column(s).
-#'
-#' @param x A DataFrame to be sorted.
-#' @param col Either a Column object or character vector indicating the field to sort on
-#' @param ... Additional sorting fields
-#' @return A DataFrame where all elements are sorted.
-#' @rdname arrange
-#' @name arrange
-#' @aliases orderby
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' arrange(df, df$col1)
-#' arrange(df, "col1")
-#' arrange(df, asc(df$col1), desc(abs(df$col2)))
-#' }
-setMethod("arrange",
-          signature(x = "DataFrame", col = "characterOrColumn"),
-          function(x, col, ...) {
-            if (class(col) == "character") {
-              sdf <- callJMethod(x@sdf, "sort", col, toSeq(...))
-            } else if (class(col) == "Column") {
-              jcols <- lapply(list(col, ...), function(c) {
-                c@jc
-              })
-              sdf <- callJMethod(x@sdf, "sort", listToSeq(jcols))
-            }
-            dataFrame(sdf)
-          })
-
-#' @rdname arrange
-#' @name orderby
-setMethod("orderBy",
-          signature(x = "DataFrame", col = "characterOrColumn"),
-          function(x, col) {
-            arrange(x, col)
-          })
-
-#' Filter
-#'
-#' Filter the rows of a DataFrame according to a given condition.
-#'
-#' @param x A DataFrame to be sorted.
-#' @param condition The condition to filter on. This may either be a Column expression
-#' or a string containing a SQL statement
-#' @return A DataFrame containing only the rows that meet the condition.
-#' @rdname filter
-#' @name filter
-#' @family subsetting functions
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' filter(df, "col1 > 0")
-#' filter(df, df$col2 != "abcdefg")
-#' }
-setMethod("filter",
-          signature(x = "DataFrame", condition = "characterOrColumn"),
-          function(x, condition) {
-            if (class(condition) == "Column") {
-              condition <- condition@jc
-            }
-            sdf <- callJMethod(x@sdf, "filter", condition)
-            dataFrame(sdf)
-          })
-
-#' @rdname filter
-#' @name where
-setMethod("where",
-          signature(x = "DataFrame", condition = "characterOrColumn"),
-          function(x, condition) {
-            filter(x, condition)
-          })
-
-#' Join
-#'
-#' Join two DataFrames based on the given join expression.
-#'
-#' @param x A Spark DataFrame
-#' @param y A Spark DataFrame
-#' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
-#' Column expression. If joinExpr is omitted, join() wil perform a Cartesian join
-#' @param joinType The type of join to perform. The following join types are available:
-#' 'inner', 'outer', 'left_outer', 'right_outer', 'semijoin'. The default joinType is "inner".
-#' @return A DataFrame containing the result of the join operation.
-#' @rdname join
-#' @name join
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' df1 <- jsonFile(sqlContext, path)
-#' df2 <- jsonFile(sqlContext, path2)
-#' join(df1, df2) # Performs a Cartesian
-#' join(df1, df2, df1$col1 == df2$col2) # Performs an inner join based on expression
-#' join(df1, df2, df1$col1 == df2$col2, "right_outer")
-#' }
-setMethod("join",
-          signature(x = "DataFrame", y = "DataFrame"),
-          function(x, y, joinExpr = NULL, joinType = NULL) {
-            if (is.null(joinExpr)) {
-              sdf <- callJMethod(x@sdf, "join", y@sdf)
-            } else {
-              if (class(joinExpr) != "Column") stop("joinExpr must be a Column")
-              if (is.null(joinType)) {
-                sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc)
-              } else {
-                if (joinType %in% c("inner", "outer", "left_outer", "right_outer", "semijoin")) {
-                  sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc, joinType)
-                } else {
-                  stop("joinType must be one of the following types: ",
-                       "'inner', 'outer', 'left_outer', 'right_outer', 'semijoin'")
-                }
-              }
-            }
-            dataFrame(sdf)
-          })
-
-#' @rdname merge
-#' @name merge
-#' @aliases join
-setMethod("merge",
-          signature(x = "DataFrame", y = "DataFrame"),
-          function(x, y, joinExpr = NULL, joinType = NULL, ...) {
-            join(x, y, joinExpr, joinType)
-          })
-
-
-#' UnionAll
-#'
-#' Return a new DataFrame containing the union of rows in this DataFrame
-#' and another DataFrame. This is equivalent to `UNION ALL` in SQL.
-#' Note that this does not remove duplicate rows across the two DataFrames.
-#'
-#' @param x A Spark DataFrame
-#' @param y A Spark DataFrame
-#' @return A DataFrame containing the result of the union.
-#' @rdname unionAll
-#' @name unionAll
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' df1 <- jsonFile(sqlContext, path)
-#' df2 <- jsonFile(sqlContext, path2)
-#' unioned <- unionAll(df, df2)
-#' }
-setMethod("unionAll",
-          signature(x = "DataFrame", y = "DataFrame"),
-          function(x, y) {
-            unioned <- callJMethod(x@sdf, "unionAll", y@sdf)
-            dataFrame(unioned)
-          })
-
-#' @title Union two or more DataFrames
-#
-#' @description Returns a new DataFrame containing rows of all parameters.
-#
-#' @rdname rbind
-#' @name rbind
-#' @aliases unionAll
-setMethod("rbind",
-          signature(... = "DataFrame"),
-          function(x, ..., deparse.level = 1) {
-            if (nargs() == 3) {
-              unionAll(x, ...)
-            } else {
-              unionAll(x, Recall(..., deparse.level = 1))
-            }
-          })
-
-#' Intersect
-#'
-#' Return a new DataFrame containing rows only in both this DataFrame
-#' and another DataFrame. This is equivalent to `INTERSECT` in SQL.
-#'
-#' @param x A Spark DataFrame
-#' @param y A Spark DataFrame
-#' @return A DataFrame containing the result of the intersect.
-#' @rdname intersect
-#' @name intersect
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' df1 <- jsonFile(sqlContext, path)
-#' df2 <- jsonFile(sqlContext, path2)
-#' intersectDF <- intersect(df, df2)
-#' }
-setMethod("intersect",
-          signature(x = "DataFrame", y = "DataFrame"),
-          function(x, y) {
-            intersected <- callJMethod(x@sdf, "intersect", y@sdf)
-            dataFrame(intersected)
-          })
-
-#' except
-#'
-#' Return a new DataFrame containing rows in this DataFrame
-#' but not in another DataFrame. This is equivalent to `EXCEPT` in SQL.
-#'
-#' @param x A Spark DataFrame
-#' @param y A Spark DataFrame
-#' @return A DataFrame containing the result of the except operation.
-#' @rdname except
-#' @name except
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' df1 <- jsonFile(sqlContext, path)
-#' df2 <- jsonFile(sqlContext, path2)
-#' exceptDF <- except(df, df2)
-#' }
-#' @rdname except
-#' @export
-setMethod("except",
-          signature(x = "DataFrame", y = "DataFrame"),
-          function(x, y) {
-            excepted <- callJMethod(x@sdf, "except", y@sdf)
-            dataFrame(excepted)
-          })
-
-#' Save the contents of the DataFrame to a data source
-#'
-#' The data source is specified by the `source` and a set of options (...).
-#' If `source` is not specified, the default data source configured by
-#' spark.sql.sources.default will be used.
-#'
-#' Additionally, mode is used to specify the behavior of the save operation when
-#' data already exists in the data source. There are four modes:
-#'  append: Contents of this DataFrame are expected to be appended to existing data.
-#'  overwrite: Existing data is expected to be overwritten by the contents of
-#     this DataFrame.
-#'  error: An exception is expected to be thrown.
-#'  ignore: The save operation is expected to not save the contents of the DataFrame
-#     and to not change the existing data.
-#'
-#' @param df A SparkSQL DataFrame
-#' @param path A name for the table
-#' @param source A name for external data source
-#' @param mode One of 'append', 'overwrite', 'error', 'ignore'
-#'
-#' @rdname write.df
-#' @name write.df
-#' @aliases saveDF
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' write.df(df, "myfile", "parquet", "overwrite")
-#' }
-setMethod("write.df",
-          signature(df = "DataFrame", path = "character"),
-          function(df, path, source = NULL, mode = "append", ...){
-            if (is.null(source)) {
-              sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)
-              source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default",
-                                    "org.apache.spark.sql.parquet")
-            }
-            allModes <- c("append", "overwrite", "error", "ignore")
-            # nolint start
-            if (!(mode %in% allModes)) {
-              stop('mode should be one of "append", "overwrite", "error", "ignore"')
-            }
-            # nolint end
-            jmode <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "saveMode", mode)
-            options <- varargsToEnv(...)
-            if (!is.null(path)) {
-                options[["path"]] <- path
-            }
-            callJMethod(df@sdf, "save", source, jmode, options)
-          })
-
-#' @rdname write.df
-#' @name saveDF
-#' @export
-setMethod("saveDF",
-          signature(df = "DataFrame", path = "character"),
-          function(df, path, source = NULL, mode = "append", ...){
-            write.df(df, path, source, mode, ...)
-          })
-
-#' saveAsTable
-#'
-#' Save the contents of the DataFrame to a data source as a table
-#'
-#' The data source is specified by the `source` and a set of options (...).
-#' If `source` is not specified, the default data source configured by
-#' spark.sql.sources.default will be used.
-#'
-#' Additionally, mode is used to specify the behavior of the save operation when
-#' data already exists in the data source. There are four modes:
-#'  append: Contents of this DataFrame are expected to be appended to existing data.
-#'  overwrite: Existing data is expected to be overwritten by the contents of
-#     this DataFrame.
-#'  error: An exception is expected to be thrown.
-#'  ignore: The save operation is expected to not save the contents of the DataFrame
-#     and to not change the existing data.
-#'
-#' @param df A SparkSQL DataFrame
-#' @param tableName A name for the table
-#' @param source A name for external data source
-#' @param mode One of 'append', 'overwrite', 'error', 'ignore'
-#'
-#' @rdname saveAsTable
-#' @name saveAsTable
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' saveAsTable(df, "myfile")
-#' }
-setMethod("saveAsTable",
-          signature(df = "DataFrame", tableName = "character", source = "character",
-                    mode = "character"),
-          function(df, tableName, source = NULL, mode="append", ...){
-            if (is.null(source)) {
-              sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)
-              source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default",
-                                    "org.apache.spark.sql.parquet")
-            }
-            allModes <- c("append", "overwrite", "error", "ignore")
-            # nolint start
-            if (!(mode %in% allModes)) {
-              stop('mode should be one of "append", "overwrite", "error", "ignore"')
-            }
-            # nolint end
-            jmode <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "saveMode", mode)
-            options <- varargsToEnv(...)
-            callJMethod(df@sdf, "saveAsTable", tableName, source, jmode, options)
-          })
-
-#' describe
-#'
-#' Computes statistics for numeric columns.
-#' If no columns are given, this function computes statistics for all numerical columns.
-#'
-#' @param x A DataFrame to be computed.
-#' @param col A string of name
-#' @param ... Additional expressions
-#' @return A DataFrame
-#' @rdname describe
-#' @name describe
-#' @aliases summary
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlContext, path)
-#' describe(df)
-#' describe(df, "col1")
-#' describe(df, "col1", "col2")
-#' }
-setMethod("describe",
-          signature(x = "DataFrame", col = "character"),
-          function(x, col, ...) {
-            colList <- list(col, ...)
-            sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
-            dataFrame(sdf)
-          })
-
-#' @rdname describe
-#' @name describe
-setMethod("describe",
-          signature(x = "DataFrame"),
-          function(x) {
-            colList <- as.list(c(columns(x)))
-            sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
-            dataFrame(sdf)
-          })
-
-#' @title Summary
-#'
-#' @description Computes statistics for numeric columns of the DataFrame
-#'
-#' @rdname summary
-#' @name summary
-setMethod("summary",
-          signature(x = "DataFrame"),
-          function(x) {
-            describe(x)
-          })
-
-
-#' dropna
-#'
-#' Returns a new DataFrame omitting rows with null values.
-#'
-#' @param x A SparkSQL DataFrame.
-#' @param how "any" or "all".
-#'            if "any", drop a row if it contains any nulls.
-#'            if "all", drop a row only if all its values are null.
-#'            if minNonNulls is specified, how is ignored.
-#' @param minNonNulls If specified, drop rows that have less than
-#'                    minNonNulls non-null values.
-#'                    This overwrites the how parameter.
-#' @param cols Optional list of column names to consider.
-#' @return A DataFrame
-#'
-#' @rdname nafunctions
-#' @name dropna
-#' @aliases na.omit
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlCtx, path)
-#' dropna(df)
-#' }
-setMethod("dropna",
-          signature(x = "DataFrame"),
-          function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
-            how <- match.arg(how)
-            if (is.null(cols)) {
-              cols <- columns(x)
-            }
-            if (is.null(minNonNulls)) {
-              minNonNulls <- if (how == "any") { length(cols) } else { 1 }
-            }
-
-            naFunctions <- callJMethod(x@sdf, "na")
-            sdf <- callJMethod(naFunctions, "drop",
-                               as.integer(minNonNulls), listToSeq(as.list(cols)))
-            dataFrame(sdf)
-          })
-
-#' @rdname nafunctions
-#' @name na.omit
-#' @export
-setMethod("na.omit",
-          signature(object = "DataFrame"),
-          function(object, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
-            dropna(object, how, minNonNulls, cols)
-          })
-
-#' fillna
-#'
-#' Replace null values.
-#'
-#' @param x A SparkSQL DataFrame.
-#' @param value Value to replace null values with.
-#'              Should be an integer, numeric, character or named list.
-#'              If the value is a named list, then cols is ignored and
-#'              value must be a mapping from column name (character) to
-#'              replacement value. The replacement value must be an
-#'              integer, numeric or character.
-#' @param cols optional list of column names to consider.
-#'             Columns specified in cols that do not have matching data
-#'             type are ignored. For example, if value is a character, and
-#'             subset contains a non-character column, then the non-character
-#'             column is simply ignored.
-#' @return A DataFrame
-#'
-#' @rdname nafunctions
-#' @name fillna
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlCtx, path)
-#' fillna(df, 1)
-#' fillna(df, list("age" = 20, "name" = "unknown"))
-#' }
-setMethod("fillna",
-          signature(x = "DataFrame"),
-          function(x, value, cols = NULL) {
-            if (!(class(value) %in% c("integer", "numeric", "character", "list"))) {
-              stop("value should be an integer, numeric, charactor or named list.")
-            }
-
-            if (class(value) == "list") {
-              # Check column names in the named list
-              colNames <- names(value)
-              if (length(colNames) == 0 || !all(colNames != "")) {
-                stop("value should be an a named list with each name being a column name.")
-              }
-
-              # Convert to the named list to an environment to be passed to JVM
-              valueMap <- new.env()
-              for (col in colNames) {
-                # Check each item in the named list is of valid type
-                v <- value[[col]]
-                if (!(class(v) %in% c("integer", "numeric", "character"))) {
-                  stop("Each item in value should be an integer, numeric or charactor.")
-                }
-                valueMap[[col]] <- v
-              }
-
-              # When value is a named list, caller is expected not to pass in cols
-              if (!is.null(cols)) {
-                warning("When value is a named list, cols is ignored!")
-                cols <- NULL
-              }
-
-              value <- valueMap
-            } else if (is.integer(value)) {
-              # Cast an integer to a numeric
-              value <- as.numeric(value)
-            }
-
-            naFunctions <- callJMethod(x@sdf, "na")
-            sdf <- if (length(cols) == 0) {
-              callJMethod(naFunctions, "fill", value)
-            } else {
-              callJMethod(naFunctions, "fill", value, listToSeq(as.list(cols)))
-            }
-            dataFrame(sdf)
-          })
-
-#' crosstab
-#'
-#' Computes a pair-wise frequency table of the given columns. Also known as a contingency
-#' table. The number of distinct values for each column should be less than 1e4. At most 1e6
-#' non-zero pair frequencies will be returned.
-#'
-#' @param col1 name of the first column. Distinct items will make the first item of each row.
-#' @param col2 name of the second column. Distinct items will make the column names of the output.
-#' @return a local R data.frame representing the contingency table. The first column of each row
-#'         will be the distinct values of `col1` and the column names will be the distinct values
-#'         of `col2`. The name of the first column will be `$col1_$col2`. Pairs that have no
-#'         occurrences will have zero as their counts.
-#'
-#' @rdname statfunctions
-#' @name crosstab
-#' @export
-#' @examples
-#' \dontrun{
-#' df <- jsonFile(sqlCtx, "/path/to/file.json")
-#' ct = crosstab(df, "title", "gender")
-#' }
-setMethod("crosstab",
-          signature(x = "DataFrame", col1 = "character", col2 = "character"),
-          function(x, col1, col2) {
-            statFunctions <- callJMethod(x@sdf, "stat")
-            sct <- callJMethod(statFunctions, "crosstab", col1, col2)
-            collect(dataFrame(sct))
-          })