You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gora.apache.org by le...@apache.org on 2015/09/03 09:28:48 UTC

[12/25] gora git commit: GoraSparkEngine.java setOutput method renamed to generateOutputConf. Added a new method to GoraSparkEngine.java which creates a job and returns necessary conf for Spark to use.

GoraSparkEngine.java setOutput method renamed to generateOutputConf.
Added a new method to GoraSparkEngine.java which creates a job and returns necessary conf for Spark to use.


Project: http://git-wip-us.apache.org/repos/asf/gora/repo
Commit: http://git-wip-us.apache.org/repos/asf/gora/commit/85849111
Tree: http://git-wip-us.apache.org/repos/asf/gora/tree/85849111
Diff: http://git-wip-us.apache.org/repos/asf/gora/diff/85849111

Branch: refs/heads/master
Commit: 8584911176712f63e316707ace23a12060588cdc
Parents: 62be0c3
Author: Furkan KAMACI <fu...@gmail.com>
Authored: Mon Aug 17 23:25:15 2015 +0300
Committer: Furkan KAMACI <fu...@gmail.com>
Committed: Mon Aug 17 23:25:15 2015 +0300

----------------------------------------------------------------------
 .../org/apache/gora/spark/GoraSparkEngine.java  | 28 +++++++++++++++-----
 .../gora/tutorial/log/LogAnalyticsSpark.java    |  5 +---
 2 files changed, 23 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/gora/blob/85849111/gora-core/src/main/java/org/apache/gora/spark/GoraSparkEngine.java
----------------------------------------------------------------------
diff --git a/gora-core/src/main/java/org/apache/gora/spark/GoraSparkEngine.java b/gora-core/src/main/java/org/apache/gora/spark/GoraSparkEngine.java
index 9c64542..7a819fd 100644
--- a/gora-core/src/main/java/org/apache/gora/spark/GoraSparkEngine.java
+++ b/gora-core/src/main/java/org/apache/gora/spark/GoraSparkEngine.java
@@ -97,19 +97,35 @@ public class GoraSparkEngine<K, V extends Persistent> {
   }
 
     /**
-     * Sets the output parameters for the job
+     * Creates a job and sets the output parameters for the conf that Spark will use
+     * @param dataStore the datastore as the output
+     * @param reuseObjects whether to reuse objects in serialization
+     */
+    public <K, V extends Persistent> Configuration generateOutputConf(DataStore<K, V> dataStore,
+        boolean reuseObjects) throws IOException {
+
+      Configuration hadoopConf = new Configuration();
+      GoraMapReduceUtils.setIOSerializations(hadoopConf, true);
+      Job job = Job.getInstance(hadoopConf);
+
+      return generateOutputConf(job, dataStore.getClass(), dataStore.getKeyClass(),
+           dataStore.getPersistentClass(), reuseObjects);
+    }
+
+    /**
+     * Sets the output parameters for the conf that Spark will use
      * @param job the job to set the properties for
      * @param dataStore the datastore as the output
      * @param reuseObjects whether to reuse objects in serialization
      */
-    public <K, V extends Persistent> Configuration setOutput(Job job,
+    public <K, V extends Persistent> Configuration generateOutputConf(Job job,
         DataStore<K, V> dataStore, boolean reuseObjects) {
-      return setOutput(job, dataStore.getClass(), dataStore.getKeyClass(),
-           dataStore.getPersistentClass(), reuseObjects);
+      return generateOutputConf(job, dataStore.getClass(), dataStore.getKeyClass(),
+              dataStore.getPersistentClass(), reuseObjects);
     }
 
     /**
-     * Sets the output parameters for the job
+     * Sets the output parameters for the conf that Spark will use
      *
      * @param job             the job to set the properties for
      * @param dataStoreClass  the datastore class
@@ -118,7 +134,7 @@ public class GoraSparkEngine<K, V extends Persistent> {
      * @param reuseObjects    whether to reuse objects in serialization
      */
     @SuppressWarnings("rawtypes")
-    public <K, V extends Persistent> Configuration setOutput(Job job,
+    public <K, V extends Persistent> Configuration generateOutputConf(Job job,
         Class<? extends DataStore> dataStoreClass,
         Class<K> keyClass, Class<V> persistentClass,
         boolean reuseObjects) {

http://git-wip-us.apache.org/repos/asf/gora/blob/85849111/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java
----------------------------------------------------------------------
diff --git a/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java b/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java
index d441de7..c66f7e4 100644
--- a/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java
+++ b/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java
@@ -190,10 +190,7 @@ public class LogAnalyticsSpark {
     //
 
     //write output to datastore
-    GoraMapReduceUtils.setIOSerializations(hadoopConf, true);
-    Job job = Job.getInstance(hadoopConf);
-
-    Configuration sparkHadoopConf = goraSparkEngine.setOutput(job, outStore, true);
+    Configuration sparkHadoopConf = goraSparkEngine.generateOutputConf(outStore, true);
     reducedGoraRdd.saveAsNewAPIHadoopDataset(sparkHadoopConf);
     //