You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@amaterasu.apache.org by ya...@apache.org on 2018/05/15 02:17:36 UTC
[incubator-amaterasu] branch version-0.2.0-incubating-rc3 updated:
fixed missing licenses, Apache Rat runs successfuly
This is an automated email from the ASF dual-hosted git repository.
yaniv pushed a commit to branch version-0.2.0-incubating-rc3
in repository https://gitbox.apache.org/repos/asf/incubator-amaterasu.git
The following commit(s) were added to refs/heads/version-0.2.0-incubating-rc3 by this push:
new 7cb88c8 fixed missing licenses, Apache Rat runs successfuly
7cb88c8 is described below
commit 7cb88c891b9444e3de3ad28fb51d6e716b0713d2
Author: Yaniv Rodenski <ro...@gmail.com>
AuthorDate: Tue May 15 12:17:13 2018 +1000
fixed missing licenses, Apache Rat runs successfuly
---
.gitignore | 14 +++++
build.gradle | 2 +-
executor/src/main/resources/runtime.py | 16 +++++
executor/src/main/resources/spark_intp.py | 20 ++++--
.../src/test/resources/pyspark-with-amacontext.py | 37 +++++++++++
executor/src/test/resources/runtime.py | 34 ++++++++++-
executor/src/test/resources/simple-pyspark.py | 16 +++++
executor/src/test/resources/simple-python-err.py | 16 +++++
executor/src/test/resources/simple-python.py | 16 +++++
executor/src/test/resources/simple-spark.scala | 16 +++++
executor/src/test/resources/spark_intp.py | 71 ++++++++++++++++------
executor/src/test/resources/step-2.scala | 16 +++++
leader/src/main/scripts/amaterasu.properties | 14 +++++
leader/src/main/scripts/log4j.properties | 15 +++++
14 files changed, 276 insertions(+), 27 deletions(-)
diff --git a/.gitignore b/.gitignore
index a64ebd0..98981a1 100755
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
### Scala ###
*.class
*.log
diff --git a/build.gradle b/build.gradle
index 159a975..79b926f 100644
--- a/build.gradle
+++ b/build.gradle
@@ -57,7 +57,7 @@ task buildDistribution() {
rat {
// List of exclude directives, defaults to ['**/.gradle/**']
- excludes = ["**/build/**", '**/.gradle/**', '**/gradle/**', '**/.idea/**', '**/.ruby-version/**', '**/repo/**', '**/resources/**']
+ excludes = ["**/build/**", '**/.gradle/**', '**/gradle/**', '**/.idea/**', '**/.ruby-version/**', '**/repo/**', '**/resources/**', '**/*.iml/**']
}
tasks.withType(Test) {
diff --git a/executor/src/main/resources/runtime.py b/executor/src/main/resources/runtime.py
index 874b174..d01664c 100644
--- a/executor/src/main/resources/runtime.py
+++ b/executor/src/main/resources/runtime.py
@@ -1,3 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
class AmaContext(object):
def __init__(self, sc, spark, job_id, env):
diff --git a/executor/src/main/resources/spark_intp.py b/executor/src/main/resources/spark_intp.py
index f1752a2..0faae2b 100755
--- a/executor/src/main/resources/spark_intp.py
+++ b/executor/src/main/resources/spark_intp.py
@@ -1,10 +1,20 @@
#!/usr/bin/python
-
-# import os
-# user_paths = os.environ['PYTHONPATH']
#
-# with open('/Users/roadan/pypath.txt', 'a') as the_file:
-# the_file.write(user_paths)
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
import ast
import codegen
diff --git a/executor/src/test/resources/pyspark-with-amacontext.py b/executor/src/test/resources/pyspark-with-amacontext.py
index bd780a9..c940eea 100755
--- a/executor/src/test/resources/pyspark-with-amacontext.py
+++ b/executor/src/test/resources/pyspark-with-amacontext.py
@@ -1,3 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+class AmaContext(object):
+
+ def __init__(self, sc, spark, job_id, env):
+ self.sc = sc
+ self.spark = spark
+ self.job_id = job_id
+ self.env = env
+
+ def get_dataframe(self, action_name, dataset_name, format = "parquet"):
+ return self.spark.read.format(format).load(str(self.env.working_dir) + "/" + self.job_id + "/" + action_name + "/" + dataset_name)
+
+class Environment(object):
+
+ def __init__(self, name, master, input_root_path, output_root_path, working_dir, configuration):
+ self.name = name
+ self.master = master
+ self.input_root_path = input_root_path
+ self.output_root_path = output_root_path
+ self.working_dir = working_dir
+ self.configuration = configuration
+
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
rdd = ama_context.sc.parallelize(data)
odd = rdd.filter(lambda num: num % 2 != 0)
\ No newline at end of file
diff --git a/executor/src/test/resources/runtime.py b/executor/src/test/resources/runtime.py
index 3a90952..d01664c 100644
--- a/executor/src/test/resources/runtime.py
+++ b/executor/src/test/resources/runtime.py
@@ -1,6 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
class AmaContext(object):
- def __init__(self, sc, sqlContext):
+ def __init__(self, sc, spark, job_id, env):
self.sc = sc
- self.sqlContext = sqlContext
+ self.spark = spark
+ self.job_id = job_id
+ self.env = env
+ def get_dataframe(self, action_name, dataset_name, format = "parquet"):
+ return self.spark.read.format(format).load(str(self.env.working_dir) + "/" + self.job_id + "/" + action_name + "/" + dataset_name)
+
+class Environment(object):
+
+ def __init__(self, name, master, input_root_path, output_root_path, working_dir, configuration):
+ self.name = name
+ self.master = master
+ self.input_root_path = input_root_path
+ self.output_root_path = output_root_path
+ self.working_dir = working_dir
+ self.configuration = configuration
diff --git a/executor/src/test/resources/simple-pyspark.py b/executor/src/test/resources/simple-pyspark.py
index df9eb0a..923f81c 100755
--- a/executor/src/test/resources/simple-pyspark.py
+++ b/executor/src/test/resources/simple-pyspark.py
@@ -1,3 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
try:
rdd = sc.parallelize(data)
diff --git a/executor/src/test/resources/simple-python-err.py b/executor/src/test/resources/simple-python-err.py
index 14ca311..dff1491 100755
--- a/executor/src/test/resources/simple-python-err.py
+++ b/executor/src/test/resources/simple-python-err.py
@@ -1,3 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
data = [1, 2, 3, 4, 5]
1/0
diff --git a/executor/src/test/resources/simple-python.py b/executor/src/test/resources/simple-python.py
index 3f25951..0ac6f85 100755
--- a/executor/src/test/resources/simple-python.py
+++ b/executor/src/test/resources/simple-python.py
@@ -1,3 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
data = [1, 2, 3, 4, 5]
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
print(data)
diff --git a/executor/src/test/resources/simple-spark.scala b/executor/src/test/resources/simple-spark.scala
index 797235d..802547c 100755
--- a/executor/src/test/resources/simple-spark.scala
+++ b/executor/src/test/resources/simple-spark.scala
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
import org.apache.amaterasu.executor.runtime.AmaContext
import org.apache.spark.sql.{DataFrame, SaveMode}
diff --git a/executor/src/test/resources/spark_intp.py b/executor/src/test/resources/spark_intp.py
index 6b8eaf6..0faae2b 100755
--- a/executor/src/test/resources/spark_intp.py
+++ b/executor/src/test/resources/spark_intp.py
@@ -1,24 +1,40 @@
#!/usr/bin/python
-
-# import os
-# user_paths = os.environ['PYTHONPATH']
#
-# with open('/Users/roadan/pypath.txt', 'a') as the_file:
-# the_file.write(user_paths)
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
import ast
import codegen
import os
import sys
-from runtime import AmaContext
-os.chdir(os.getcwd() + '/build/resources/test/')
-import zipfile
-zip = zipfile.ZipFile('pyspark.zip')
-zip.extractall()
-zip = zipfile.ZipFile('py4j-0.10.4-src.zip', 'r')
-zip.extractall()
+import zipimport
+from runtime import AmaContext, Environment
+
+# os.chdir(os.getcwd() + '/build/resources/test/')
+# import zipfile
+# zip = zipfile.ZipFile('pyspark.zip')
+# zip.extractall()
+# zip = zipfile.ZipFile('py4j-0.10.4-src.zip', 'r')
+# zip.extractall()
# sys.path.insert(1, os.getcwd() + '/executor/src/test/resources/pyspark')
# sys.path.insert(1, os.getcwd() + '/executor/src/test/resources/py4j')
+
+# py4j_path = 'spark-2.2.1-bin-hadoop2.7/python/lib/py4j-0.10.4-src.zip'
+# py4j_importer = zipimport.zipimporter(py4j_path)
+# py4j = py4j_importer.load_module('py4j')
from py4j.java_gateway import JavaGateway, GatewayClient, java_import
from py4j.protocol import Py4JJavaError
from pyspark.conf import SparkConf
@@ -31,9 +47,10 @@ from pyspark.accumulators import Accumulator, AccumulatorParam
from pyspark.broadcast import Broadcast
from pyspark.serializers import MarshalSerializer, PickleSerializer
from pyspark.sql import SparkSession
+from pyspark.sql import Row
client = GatewayClient(port=int(sys.argv[1]))
-gateway = JavaGateway(client, auto_convert = True)
+gateway = JavaGateway(client, auto_convert=True)
entry_point = gateway.entry_point
queue = entry_point.getExecutionQueue()
@@ -49,28 +66,44 @@ java_import(gateway.jvm, "scala.Tuple2")
jconf = entry_point.getSparkConf()
jsc = entry_point.getJavaSparkContext()
-conf = SparkConf(_jvm = gateway.jvm, _jconf = jconf)
+job_id = entry_point.getJobId()
+javaEnv = entry_point.getEnv()
+env = Environment(javaEnv.name(), javaEnv.master(), javaEnv.inputRootPath(), javaEnv.outputRootPath(), javaEnv.workingDir(), javaEnv.configuration())
+conf = SparkConf(_jvm=gateway.jvm, _jconf=jconf)
+conf.setExecutorEnv('PYTHONPATH', ':'.join(sys.path))
sc = SparkContext(jsc=jsc, gateway=gateway, conf=conf)
-
spark = SparkSession(sc, entry_point.getSparkSession())
-sqlc = spark._wrapped
-ama_context = AmaContext(sc, sqlc)
+ama_context = AmaContext(sc, spark, job_id, env)
while True:
actionData = queue.getNext()
resultQueue = entry_point.getResultQueue(actionData._2())
actionSource = actionData._1()
tree = ast.parse(actionSource)
+ exports = actionData._3()
for node in tree.body:
wrapper = ast.Module(body=[node])
try:
- co = compile(wrapper, "<ast>", 'exec')
- exec(co)
+ co = compile(wrapper, "<ast>", 'exec')
+ exec (co)
resultQueue.put('success', actionData._2(), codegen.to_source(node), '')
+
+ #if this node is an assignment, we need to check if it needs to be persisted
+ try:
+ persistCode = ''
+ if(isinstance(node,ast.Assign)):
+ varName = node.targets[0].id
+ if(exports.containsKey(varName)):
+ persistCode = varName + ".write.save(\"" + env.working_dir + "/" + job_id + "/" + actionData._2() + "/" + varName + "\", format=\"" + exports[varName] + "\", mode='overwrite')"
+ persist = compile(persistCode, '<stdin>', 'exec')
+ exec(persist)
+
+ except:
+ resultQueue.put('error', actionData._2(), persistCode, str(sys.exc_info()[1]))
except:
resultQueue.put('error', actionData._2(), codegen.to_source(node), str(sys.exc_info()[1]))
resultQueue.put('completion', '', '', '')
\ No newline at end of file
diff --git a/executor/src/test/resources/step-2.scala b/executor/src/test/resources/step-2.scala
index 34ad839..4b0dfca 100755
--- a/executor/src/test/resources/step-2.scala
+++ b/executor/src/test/resources/step-2.scala
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
import org.apache.amaterasu.executor.runtime.AmaContext
val oddRdd = AmaContext.getRDD[Int]("start", "rdd").filter(x=>x/2 == 0)
diff --git a/leader/src/main/scripts/amaterasu.properties b/leader/src/main/scripts/amaterasu.properties
index 572a642..5cd6638 100755
--- a/leader/src/main/scripts/amaterasu.properties
+++ b/leader/src/main/scripts/amaterasu.properties
@@ -1,3 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
zk=127.0.0.1
version=0.2.0-incubating-rc3
master=192.168.33.11
diff --git a/leader/src/main/scripts/log4j.properties b/leader/src/main/scripts/log4j.properties
index c5e965f..742eb59 100644
--- a/leader/src/main/scripts/log4j.properties
+++ b/leader/src/main/scripts/log4j.properties
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
# Root logger option
log4j.rootLogger=DEBUG, stdout, file
--
To stop receiving notification emails like this one, please contact
yaniv@apache.org.