You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sh...@apache.org on 2015/05/23 09:04:26 UTC
spark git commit: [SPARK-6811] Copy SparkR lib in make-distribution.sh
Repository: spark
Updated Branches:
refs/heads/master 7af3818c6 -> a40bca011
[SPARK-6811] Copy SparkR lib in make-distribution.sh
This change also remove native libraries from SparkR to make sure our distribution works across platforms
Tested by building on Mac, running on Amazon Linux (CentOS), Windows VM and vice-versa (built on Linux run on Mac)
I will also test this with YARN soon and update this PR.
Author: Shivaram Venkataraman <sh...@cs.berkeley.edu>
Closes #6373 from shivaram/sparkr-binary and squashes the following commits:
ae41b5c [Shivaram Venkataraman] Remove native libraries from SparkR Also include the built SparkR package in make-distribution.sh
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a40bca01
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a40bca01
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a40bca01
Branch: refs/heads/master
Commit: a40bca0111de45763c3ef4270afb2185c16b8f95
Parents: 7af3818
Author: Shivaram Venkataraman <sh...@cs.berkeley.edu>
Authored: Sat May 23 00:04:01 2015 -0700
Committer: Shivaram Venkataraman <sh...@cs.berkeley.edu>
Committed: Sat May 23 00:04:01 2015 -0700
----------------------------------------------------------------------
R/pkg/NAMESPACE | 5 +++-
R/pkg/R/utils.R | 38 ++++++++++++++++++++++++-
R/pkg/src-native/Makefile | 27 ++++++++++++++++++
R/pkg/src-native/Makefile.win | 27 ++++++++++++++++++
R/pkg/src-native/string_hash_code.c | 49 ++++++++++++++++++++++++++++++++
R/pkg/src/Makefile | 27 ------------------
R/pkg/src/Makefile.win | 27 ------------------
R/pkg/src/string_hash_code.c | 49 --------------------------------
make-distribution.sh | 2 ++
9 files changed, 146 insertions(+), 105 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 64ffdcf..411126a 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -1,6 +1,9 @@
# Imports from base R
importFrom(methods, setGeneric, setMethod, setOldClass)
-useDynLib(SparkR, stringHashCode)
+
+# Disable native libraries till we figure out how to package it
+# See SPARKR-7839
+#useDynLib(SparkR, stringHashCode)
# S3 methods exported
export("sparkR.init")
http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/R/utils.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 0e7b7bd..69b2700 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -122,13 +122,49 @@ hashCode <- function(key) {
intBits <- packBits(rawToBits(rawVec), "integer")
as.integer(bitwXor(intBits[2], intBits[1]))
} else if (class(key) == "character") {
- .Call("stringHashCode", key)
+ # TODO: SPARK-7839 means we might not have the native library available
+ if (is.loaded("stringHashCode")) {
+ .Call("stringHashCode", key)
+ } else {
+ n <- nchar(key)
+ if (n == 0) {
+ 0L
+ } else {
+ asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) })
+ hashC <- 0
+ for (k in 1:length(asciiVals)) {
+ hashC <- mult31AndAdd(hashC, asciiVals[k])
+ }
+ as.integer(hashC)
+ }
+ }
} else {
warning(paste("Could not hash object, returning 0", sep = ""))
as.integer(0)
}
}
+# Helper function used to wrap a 'numeric' value to integer bounds.
+# Useful for implementing C-like integer arithmetic
+wrapInt <- function(value) {
+ if (value > .Machine$integer.max) {
+ value <- value - 2 * .Machine$integer.max - 2
+ } else if (value < -1 * .Machine$integer.max) {
+ value <- 2 * .Machine$integer.max + value + 2
+ }
+ value
+}
+
+# Multiply `val` by 31 and add `addVal` to the result. Ensures that
+# integer-overflows are handled at every step.
+mult31AndAdd <- function(val, addVal) {
+ vec <- c(bitwShiftL(val, c(4,3,2,1,0)), addVal)
+ Reduce(function(a, b) {
+ wrapInt(as.numeric(a) + as.numeric(b))
+ },
+ vec)
+}
+
# Create a new RDD with serializedMode == "byte".
# Return itself if already in "byte" format.
serializeToBytes <- function(rdd) {
http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src-native/Makefile
----------------------------------------------------------------------
diff --git a/R/pkg/src-native/Makefile b/R/pkg/src-native/Makefile
new file mode 100644
index 0000000..a55a56f
--- /dev/null
+++ b/R/pkg/src-native/Makefile
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+all: sharelib
+
+sharelib: string_hash_code.c
+ R CMD SHLIB -o SparkR.so string_hash_code.c
+
+clean:
+ rm -f *.o
+ rm -f *.so
+
+.PHONY: all clean
http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src-native/Makefile.win
----------------------------------------------------------------------
diff --git a/R/pkg/src-native/Makefile.win b/R/pkg/src-native/Makefile.win
new file mode 100644
index 0000000..aa486d8
--- /dev/null
+++ b/R/pkg/src-native/Makefile.win
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+all: sharelib
+
+sharelib: string_hash_code.c
+ R CMD SHLIB -o SparkR.dll string_hash_code.c
+
+clean:
+ rm -f *.o
+ rm -f *.dll
+
+.PHONY: all clean
http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src-native/string_hash_code.c
----------------------------------------------------------------------
diff --git a/R/pkg/src-native/string_hash_code.c b/R/pkg/src-native/string_hash_code.c
new file mode 100644
index 0000000..e3274b9
--- /dev/null
+++ b/R/pkg/src-native/string_hash_code.c
@@ -0,0 +1,49 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+ * A C function for R extension which implements the Java String hash algorithm.
+ * Refer to http://en.wikipedia.org/wiki/Java_hashCode%28%29#The_java.lang.String_hash_function
+ *
+ */
+
+#include <R.h>
+#include <Rinternals.h>
+
+/* for compatibility with R before 3.1 */
+#ifndef IS_SCALAR
+#define IS_SCALAR(x, type) (TYPEOF(x) == (type) && XLENGTH(x) == 1)
+#endif
+
+SEXP stringHashCode(SEXP string) {
+ const char* str;
+ R_xlen_t len, i;
+ int hashCode = 0;
+
+ if (!IS_SCALAR(string, STRSXP)) {
+ error("invalid input");
+ }
+
+ str = CHAR(asChar(string));
+ len = XLENGTH(asChar(string));
+
+ for (i = 0; i < len; i++) {
+ hashCode = (hashCode << 5) - hashCode + *str++;
+ }
+
+ return ScalarInteger(hashCode);
+}
http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src/Makefile
----------------------------------------------------------------------
diff --git a/R/pkg/src/Makefile b/R/pkg/src/Makefile
deleted file mode 100644
index a55a56f..0000000
--- a/R/pkg/src/Makefile
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-all: sharelib
-
-sharelib: string_hash_code.c
- R CMD SHLIB -o SparkR.so string_hash_code.c
-
-clean:
- rm -f *.o
- rm -f *.so
-
-.PHONY: all clean
http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src/Makefile.win
----------------------------------------------------------------------
diff --git a/R/pkg/src/Makefile.win b/R/pkg/src/Makefile.win
deleted file mode 100644
index aa486d8..0000000
--- a/R/pkg/src/Makefile.win
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-all: sharelib
-
-sharelib: string_hash_code.c
- R CMD SHLIB -o SparkR.dll string_hash_code.c
-
-clean:
- rm -f *.o
- rm -f *.dll
-
-.PHONY: all clean
http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src/string_hash_code.c
----------------------------------------------------------------------
diff --git a/R/pkg/src/string_hash_code.c b/R/pkg/src/string_hash_code.c
deleted file mode 100644
index e3274b9..0000000
--- a/R/pkg/src/string_hash_code.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-*/
-
-/*
- * A C function for R extension which implements the Java String hash algorithm.
- * Refer to http://en.wikipedia.org/wiki/Java_hashCode%28%29#The_java.lang.String_hash_function
- *
- */
-
-#include <R.h>
-#include <Rinternals.h>
-
-/* for compatibility with R before 3.1 */
-#ifndef IS_SCALAR
-#define IS_SCALAR(x, type) (TYPEOF(x) == (type) && XLENGTH(x) == 1)
-#endif
-
-SEXP stringHashCode(SEXP string) {
- const char* str;
- R_xlen_t len, i;
- int hashCode = 0;
-
- if (!IS_SCALAR(string, STRSXP)) {
- error("invalid input");
- }
-
- str = CHAR(asChar(string));
- len = XLENGTH(asChar(string));
-
- for (i = 0; i < len; i++) {
- hashCode = (hashCode << 5) - hashCode + *str++;
- }
-
- return ScalarInteger(hashCode);
-}
http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/make-distribution.sh
----------------------------------------------------------------------
diff --git a/make-distribution.sh b/make-distribution.sh
index 8d6e91d..7882734 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -229,6 +229,8 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
cp "$SPARK_HOME/README.md" "$DISTDIR"
cp -r "$SPARK_HOME/bin" "$DISTDIR"
cp -r "$SPARK_HOME/python" "$DISTDIR"
+mkdir -p "$DISTDIR"/R/lib
+cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib
cp -r "$SPARK_HOME/sbin" "$DISTDIR"
cp -r "$SPARK_HOME/ec2" "$DISTDIR"
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org