You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by lc...@apache.org on 2016/04/07 20:21:15 UTC

[05/12] incubator-beam git commit: [BEAM-151] Remove dependence on num workers in DatastoreIO

[BEAM-151] Remove dependence on num workers in DatastoreIO

This prevents moving DataflowPipelineOptions into a separate maven module.


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/551d0814
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/551d0814
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/551d0814

Branch: refs/heads/master
Commit: 551d081481450e750a9a95b5902a540d0662db85
Parents: d0db477
Author: Luke Cwik <lc...@google.com>
Authored: Mon Mar 28 13:11:28 2016 -0700
Committer: Luke Cwik <lc...@google.com>
Committed: Thu Apr 7 11:19:49 2016 -0700

----------------------------------------------------------------------
 .../com/google/cloud/dataflow/sdk/io/DatastoreIO.java | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/551d0814/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 9128585..8d85ab3 100644
--- a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -54,7 +54,6 @@ import com.google.cloud.dataflow.sdk.coders.EntityCoder;
 import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
 import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
 import com.google.cloud.dataflow.sdk.io.Sink.Writer;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
@@ -307,18 +306,7 @@ public class DatastoreIO {
         numSplits = Math.round(((double) getEstimatedSizeBytes(options)) / desiredBundleSizeBytes);
       } catch (Exception e) {
         // Fallback in case estimated size is unavailable. TODO: fix this, it's horrible.
-
-        // 1. Try Dataflow's numWorkers, which will be 0 for other workers.
-        DataflowPipelineWorkerPoolOptions poolOptions =
-            options.as(DataflowPipelineWorkerPoolOptions.class);
-        if (poolOptions.getNumWorkers() > 0) {
-          LOG.warn("Estimated size of unavailable, using the number of workers {}",
-              poolOptions.getNumWorkers(), e);
-          numSplits = poolOptions.getNumWorkers();
-        } else {
-          // 2. Default to 12 in the unknown case.
-          numSplits = 12;
-        }
+        numSplits = 12;
       }
 
       // If the desiredBundleSize or number of workers results in 1 split, simply return