You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by lc...@apache.org on 2017/12/07 23:17:25 UTC
[beam] 02/02: [BEAM-2957] Change assert in testSplit to allow up to
50% of empty splits
This is an automated email from the ASF dual-hosted git repository.
lcwik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
commit 16a506b582d7a583bf771ce3fe1f260572d6ebc9
Author: Etienne Chauchot <ec...@apache.org>
AuthorDate: Wed Dec 6 09:56:40 2017 +0100
[BEAM-2957] Change assert in testSplit to allow up to 50% of empty splits
---
.../beam/sdk/io/elasticsearch/ElasticsearchIOTest.java | 18 ++++++++++++------
.../beam/sdk/io/elasticsearch/ElasticsearchIOTest.java | 15 ++++++++++-----
.../io/elasticsearch/ElasticsearchIOTestCommon.java | 1 +
3 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-2/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-2/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
index 06298cd..c1e7662 100644
--- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-2/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-2/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
@@ -20,11 +20,14 @@ package org.apache.beam.sdk.io.elasticsearch;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.BoundedElasticsearchSource;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.ConnectionConfiguration;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Read;
+import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.ACCEPTABLE_EMPTY_SPLITS_PERCENTAGE;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.ES_INDEX;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.ES_TYPE;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.NUM_DOCS_UTESTS;
import static org.apache.beam.sdk.testing.SourceTestUtils.readFromSource;
+import static org.hamcrest.Matchers.lessThan;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
import java.io.IOException;
import java.io.Serializable;
@@ -172,14 +175,17 @@ public class ElasticsearchIOTest implements Serializable {
SourceTestUtils.assertSourcesEqualReferenceSource(initialSource, splits, options);
//this is the number of ES shards
// (By default, each index in Elasticsearch is allocated 5 primary shards)
- int expectedNumSplits = 5;
- assertEquals(expectedNumSplits, splits.size());
- int nonEmptySplits = 0;
+ int expectedNumSources = 5;
+ assertEquals("Wrong number of splits", expectedNumSources, splits.size());
+ int emptySplits = 0;
for (BoundedSource<String> subSource : splits) {
- if (readFromSource(subSource, options).size() > 0) {
- nonEmptySplits += 1;
+ if (readFromSource(subSource, options).isEmpty()) {
+ emptySplits += 1;
}
}
- assertEquals("Wrong number of non empty splits", expectedNumSplits, nonEmptySplits);
+ assertThat(
+ "There are too many empty splits, parallelism is sub-optimal",
+ emptySplits,
+ lessThan((int) (ACCEPTABLE_EMPTY_SPLITS_PERCENTAGE * splits.size())));
}
}
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
index c1d844b..ec81074 100644
--- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTest.java
@@ -20,10 +20,12 @@ package org.apache.beam.sdk.io.elasticsearch;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.BoundedElasticsearchSource;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.ConnectionConfiguration;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Read;
+import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.ACCEPTABLE_EMPTY_SPLITS_PERCENTAGE;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.ES_INDEX;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.ES_TYPE;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestCommon.NUM_DOCS_UTESTS;
import static org.apache.beam.sdk.testing.SourceTestUtils.readFromSource;
+import static org.hamcrest.Matchers.lessThan;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import java.io.Serializable;
@@ -173,13 +175,16 @@ public class ElasticsearchIOTest extends ESIntegTestCase implements Serializable
long indexSize = BoundedElasticsearchSource.estimateIndexSize(connectionConfiguration);
float expectedNumSourcesFloat = (float) indexSize / desiredBundleSizeBytes;
int expectedNumSources = (int) Math.ceil(expectedNumSourcesFloat);
- assertEquals(expectedNumSources, splits.size());
- int nonEmptySplits = 0;
+ assertEquals("Wrong number of splits", expectedNumSources, splits.size());
+ int emptySplits = 0;
for (BoundedSource<String> subSource : splits) {
- if (readFromSource(subSource, options).size() > 0) {
- nonEmptySplits += 1;
+ if (readFromSource(subSource, options).isEmpty()) {
+ emptySplits += 1;
}
}
- assertEquals("Wrong number of non empty splits", expectedNumSources, nonEmptySplits);
+ assertThat(
+ "There are too many empty splits, parallelism is sub-optimal",
+ emptySplits,
+ lessThan((int) (ACCEPTABLE_EMPTY_SPLITS_PERCENTAGE * splits.size())));
}
}
diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTestCommon.java b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTestCommon.java
index 03eaf00..55d1fe7 100644
--- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTestCommon.java
+++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOTestCommon.java
@@ -59,6 +59,7 @@ class ElasticsearchIOTestCommon implements Serializable {
static final String ES_TYPE = "test";
static final long NUM_DOCS_UTESTS = 400L;
static final long NUM_DOCS_ITESTS = 50000L;
+ static final float ACCEPTABLE_EMPTY_SPLITS_PERCENTAGE = 0.5f;
private static final long AVERAGE_DOC_SIZE = 25L;
--
To stop receiving notification emails like this one, please contact
"commits@beam.apache.org" <co...@beam.apache.org>.