You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by GitBox <gi...@apache.org> on 2019/01/15 06:27:13 UTC
[beam] Diff for: [GitHub] kennknowles merged pull request #7504: [BEAM-6427]
INTERSECT ALL is not compatible with SQL standard.
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamSetOperatorsTransforms.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamSetOperatorsTransforms.java
index 581fb08e83f8..99f21a358052 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamSetOperatorsTransforms.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamSetOperatorsTransforms.java
@@ -25,6 +25,7 @@
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.vendor.guava.v20_0.com.google.common.collect.Iterators;
/** Collections of {@code PTransform} and {@code DoFn} used to perform Set operations. */
public abstract class BeamSetOperatorsTransforms {
@@ -80,8 +81,14 @@ public void processElement(ProcessContext ctx) {
case INTERSECT:
if (leftRows.iterator().hasNext() && rightRows.iterator().hasNext()) {
if (all) {
- for (Row leftRow : leftRows) {
- ctx.output(leftRow);
+ int leftCount = Iterators.size(leftRows.iterator());
+ int rightCount = Iterators.size(rightRows.iterator());
+
+ // Say for Row R, there are m instances on left and n instances on right,
+ // INTERSECT ALL outputs MIN(m, n) instances of R.
+ Iterator<Row> iter = (leftCount <= rightCount) ? leftRows.iterator() : rightRows.iterator();
+ while (iter.hasNext()) {
+ ctx.output(iter.next());
}
} else {
ctx.output(ctx.element().getKey());
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamIntersectRelTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamIntersectRelTest.java
index 238da34daa17..1bcbed4c206c 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamIntersectRelTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamIntersectRelTest.java
@@ -108,7 +108,7 @@ public void testIntersectAll() {
+ "FROM ORDER_DETAILS2 ";
PCollection<Row> rows = compilePipeline(sql, pipeline);
- PAssert.that(rows).satisfies(new CheckSize(3));
+ PAssert.that(rows).satisfies(new CheckSize(2));
PAssert.that(rows)
.containsInAnyOrder(
@@ -116,16 +116,7 @@ public void testIntersectAll() {
Schema.FieldType.INT64, "order_id",
Schema.FieldType.INT32, "site_id",
Schema.FieldType.DECIMAL, "price")
- .addRows(
- 1L,
- 1,
- new BigDecimal(1.0),
- 1L,
- 1,
- new BigDecimal(1.0),
- 2L,
- 2,
- new BigDecimal(2.0))
+ .addRows(1L, 1, new BigDecimal(1.0), 2L, 2, new BigDecimal(2.0))
.getRows());
pipeline.run();
With regards,
Apache Git Services