You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sh...@apache.org on 2015/05/23 09:04:26 UTC

spark git commit: [SPARK-6811] Copy SparkR lib in make-distribution.sh

Repository: spark
Updated Branches:
  refs/heads/master 7af3818c6 -> a40bca011


[SPARK-6811] Copy SparkR lib in make-distribution.sh

This change also remove native libraries from SparkR to make sure our distribution works across platforms

Tested by building on Mac, running on Amazon Linux (CentOS), Windows VM and vice-versa (built on Linux run on Mac)

I will also test this with YARN soon and update this PR.

Author: Shivaram Venkataraman <sh...@cs.berkeley.edu>

Closes #6373 from shivaram/sparkr-binary and squashes the following commits:

ae41b5c [Shivaram Venkataraman] Remove native libraries from SparkR Also include the built SparkR package in make-distribution.sh


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a40bca01
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a40bca01
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a40bca01

Branch: refs/heads/master
Commit: a40bca0111de45763c3ef4270afb2185c16b8f95
Parents: 7af3818
Author: Shivaram Venkataraman <sh...@cs.berkeley.edu>
Authored: Sat May 23 00:04:01 2015 -0700
Committer: Shivaram Venkataraman <sh...@cs.berkeley.edu>
Committed: Sat May 23 00:04:01 2015 -0700

----------------------------------------------------------------------
 R/pkg/NAMESPACE                     |  5 +++-
 R/pkg/R/utils.R                     | 38 ++++++++++++++++++++++++-
 R/pkg/src-native/Makefile           | 27 ++++++++++++++++++
 R/pkg/src-native/Makefile.win       | 27 ++++++++++++++++++
 R/pkg/src-native/string_hash_code.c | 49 ++++++++++++++++++++++++++++++++
 R/pkg/src/Makefile                  | 27 ------------------
 R/pkg/src/Makefile.win              | 27 ------------------
 R/pkg/src/string_hash_code.c        | 49 --------------------------------
 make-distribution.sh                |  2 ++
 9 files changed, 146 insertions(+), 105 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 64ffdcf..411126a 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -1,6 +1,9 @@
 # Imports from base R
 importFrom(methods, setGeneric, setMethod, setOldClass)
-useDynLib(SparkR, stringHashCode)
+
+# Disable native libraries till we figure out how to package it
+# See SPARKR-7839
+#useDynLib(SparkR, stringHashCode)
 
 # S3 methods exported
 export("sparkR.init")

http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/R/utils.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 0e7b7bd..69b2700 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -122,13 +122,49 @@ hashCode <- function(key) {
     intBits <- packBits(rawToBits(rawVec), "integer")
     as.integer(bitwXor(intBits[2], intBits[1]))
   } else if (class(key) == "character") {
-    .Call("stringHashCode", key)
+    # TODO: SPARK-7839 means we might not have the native library available
+    if (is.loaded("stringHashCode")) {
+      .Call("stringHashCode", key)
+    } else {
+      n <- nchar(key)
+      if (n == 0) {
+        0L
+      } else {
+        asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) })
+        hashC <- 0
+        for (k in 1:length(asciiVals)) {
+          hashC <- mult31AndAdd(hashC, asciiVals[k])
+        }
+        as.integer(hashC)
+      }
+    }
   } else {
     warning(paste("Could not hash object, returning 0", sep = ""))
     as.integer(0)
   }
 }
 
+# Helper function used to wrap a 'numeric' value to integer bounds.
+# Useful for implementing C-like integer arithmetic
+wrapInt <- function(value) {
+  if (value > .Machine$integer.max) {
+    value <- value - 2 * .Machine$integer.max - 2
+  } else if (value < -1 * .Machine$integer.max) {
+    value <- 2 * .Machine$integer.max + value + 2
+  }
+  value
+}
+
+# Multiply `val` by 31 and add `addVal` to the result. Ensures that
+# integer-overflows are handled at every step.
+mult31AndAdd <- function(val, addVal) {
+  vec <- c(bitwShiftL(val, c(4,3,2,1,0)), addVal)
+  Reduce(function(a, b) {
+          wrapInt(as.numeric(a) + as.numeric(b))
+         },
+         vec)
+}
+
 # Create a new RDD with serializedMode == "byte".
 # Return itself if already in "byte" format.
 serializeToBytes <- function(rdd) {

http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src-native/Makefile
----------------------------------------------------------------------
diff --git a/R/pkg/src-native/Makefile b/R/pkg/src-native/Makefile
new file mode 100644
index 0000000..a55a56f
--- /dev/null
+++ b/R/pkg/src-native/Makefile
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+all: sharelib
+
+sharelib: string_hash_code.c
+	R CMD SHLIB -o SparkR.so string_hash_code.c
+
+clean:
+	rm -f *.o
+	rm -f *.so
+       
+.PHONY: all clean

http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src-native/Makefile.win
----------------------------------------------------------------------
diff --git a/R/pkg/src-native/Makefile.win b/R/pkg/src-native/Makefile.win
new file mode 100644
index 0000000..aa486d8
--- /dev/null
+++ b/R/pkg/src-native/Makefile.win
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+all: sharelib
+
+sharelib: string_hash_code.c
+	R CMD SHLIB -o SparkR.dll string_hash_code.c
+
+clean:
+	rm -f *.o
+	rm -f *.dll
+       
+.PHONY: all clean

http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src-native/string_hash_code.c
----------------------------------------------------------------------
diff --git a/R/pkg/src-native/string_hash_code.c b/R/pkg/src-native/string_hash_code.c
new file mode 100644
index 0000000..e3274b9
--- /dev/null
+++ b/R/pkg/src-native/string_hash_code.c
@@ -0,0 +1,49 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+ * A C function for R extension which implements the Java String hash algorithm.
+ * Refer to http://en.wikipedia.org/wiki/Java_hashCode%28%29#The_java.lang.String_hash_function
+ *
+ */
+
+#include <R.h>
+#include <Rinternals.h>
+
+/* for compatibility with R before 3.1 */
+#ifndef IS_SCALAR
+#define IS_SCALAR(x, type) (TYPEOF(x) == (type) && XLENGTH(x) == 1)
+#endif
+
+SEXP stringHashCode(SEXP string) {
+  const char* str;
+  R_xlen_t len, i;
+  int hashCode = 0;
+  
+  if (!IS_SCALAR(string, STRSXP)) {
+    error("invalid input");
+  }
+  
+  str = CHAR(asChar(string));
+  len = XLENGTH(asChar(string));
+  
+  for (i = 0; i < len; i++) {
+    hashCode = (hashCode << 5) - hashCode + *str++;
+  }
+
+  return ScalarInteger(hashCode);
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src/Makefile
----------------------------------------------------------------------
diff --git a/R/pkg/src/Makefile b/R/pkg/src/Makefile
deleted file mode 100644
index a55a56f..0000000
--- a/R/pkg/src/Makefile
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-all: sharelib
-
-sharelib: string_hash_code.c
-	R CMD SHLIB -o SparkR.so string_hash_code.c
-
-clean:
-	rm -f *.o
-	rm -f *.so
-       
-.PHONY: all clean

http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src/Makefile.win
----------------------------------------------------------------------
diff --git a/R/pkg/src/Makefile.win b/R/pkg/src/Makefile.win
deleted file mode 100644
index aa486d8..0000000
--- a/R/pkg/src/Makefile.win
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-all: sharelib
-
-sharelib: string_hash_code.c
-	R CMD SHLIB -o SparkR.dll string_hash_code.c
-
-clean:
-	rm -f *.o
-	rm -f *.dll
-       
-.PHONY: all clean

http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src/string_hash_code.c
----------------------------------------------------------------------
diff --git a/R/pkg/src/string_hash_code.c b/R/pkg/src/string_hash_code.c
deleted file mode 100644
index e3274b9..0000000
--- a/R/pkg/src/string_hash_code.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-*/
-
-/*
- * A C function for R extension which implements the Java String hash algorithm.
- * Refer to http://en.wikipedia.org/wiki/Java_hashCode%28%29#The_java.lang.String_hash_function
- *
- */
-
-#include <R.h>
-#include <Rinternals.h>
-
-/* for compatibility with R before 3.1 */
-#ifndef IS_SCALAR
-#define IS_SCALAR(x, type) (TYPEOF(x) == (type) && XLENGTH(x) == 1)
-#endif
-
-SEXP stringHashCode(SEXP string) {
-  const char* str;
-  R_xlen_t len, i;
-  int hashCode = 0;
-  
-  if (!IS_SCALAR(string, STRSXP)) {
-    error("invalid input");
-  }
-  
-  str = CHAR(asChar(string));
-  len = XLENGTH(asChar(string));
-  
-  for (i = 0; i < len; i++) {
-    hashCode = (hashCode << 5) - hashCode + *str++;
-  }
-
-  return ScalarInteger(hashCode);
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/make-distribution.sh
----------------------------------------------------------------------
diff --git a/make-distribution.sh b/make-distribution.sh
index 8d6e91d..7882734 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -229,6 +229,8 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
 cp "$SPARK_HOME/README.md" "$DISTDIR"
 cp -r "$SPARK_HOME/bin" "$DISTDIR"
 cp -r "$SPARK_HOME/python" "$DISTDIR"
+mkdir -p "$DISTDIR"/R/lib
+cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib
 cp -r "$SPARK_HOME/sbin" "$DISTDIR"
 cp -r "$SPARK_HOME/ec2" "$DISTDIR"
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org