You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by GitBox <gi...@apache.org> on 2022/02/03 09:32:23 UTC

[GitHub] [druid] LakshSingla commented on a change in pull request #12163: Add syntax support for PARTITIONED BY/CLUSTERED BY in INSERT queries

LakshSingla commented on a change in pull request #12163:
URL: https://github.com/apache/druid/pull/12163#discussion_r798373441



##########
File path: sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java
##########
@@ -280,6 +283,242 @@ public void testInsertFromExternal()
         .verify();
   }
 
+  @Test
+  public void testInsertWithPartitionBy()
+  {
+    // Test correctness of the query when only PARTITION BY clause is present
+    RowSignature targetRowSignature = RowSignature.builder()
+                                                  .add("__time", ColumnType.LONG)
+                                                  .add("floor_m1", ColumnType.FLOAT)
+                                                  .add("dim1", ColumnType.STRING)
+                                                  .build();
+
+    Map<String, Object> queryContext = new HashMap<>(DEFAULT_CONTEXT);
+    queryContext.put(QueryContexts.INGESTION_GRANULARITY, "day");
+
+    testInsertQuery()
+        .sql(
+            "INSERT INTO druid.dst SELECT __time, FLOOR(m1) as floor_m1, dim1 FROM foo PARTITION BY 'day'")
+        .expectTarget("dst", targetRowSignature)
+        .expectResources(dataSourceRead("foo"), dataSourceWrite("dst"))
+        .expectQuery(
+            newScanQueryBuilder()
+                .dataSource("foo")
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .columns("__time", "dim1", "v0")
+                .virtualColumns(expressionVirtualColumn("v0", "floor(\"m1\")", ColumnType.FLOAT))
+                .context(queryContext)
+                .build()
+        )
+        .verify();
+  }
+
+  @Test
+  public void testInsertWithClusterBy()
+  {
+    // Test correctness of the query when only CLUSTER BY clause is present
+    RowSignature targetRowSignature = RowSignature.builder()
+                                                  .add("__time", ColumnType.LONG)
+                                                  .add("floor_m1", ColumnType.FLOAT)
+                                                  .add("dim1", ColumnType.STRING)
+                                                  .add("EXPR$3", ColumnType.DOUBLE)
+                                                  .build();
+    testInsertQuery()
+        .sql(
+            "INSERT INTO druid.dst "
+            + "SELECT __time, FLOOR(m1) as floor_m1, dim1, CEIL(m2) FROM foo "
+            + "CLUSTER BY 2, dim1 DESC, CEIL(m2)"
+        )
+        .expectTarget("dst", targetRowSignature)
+        .expectResources(dataSourceRead("foo"), dataSourceWrite("dst"))
+        .expectQuery(
+            newScanQueryBuilder()
+                .dataSource("foo")
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .columns("__time", "dim1", "v0", "v1")
+                .virtualColumns(
+                    expressionVirtualColumn("v0", "floor(\"m1\")", ColumnType.FLOAT),
+                    expressionVirtualColumn("v1", "ceil(\"m2\")", ColumnType.DOUBLE)
+                )
+                .orderBy(
+                    ImmutableList.of(
+                        new ScanQuery.OrderBy("v0", ScanQuery.Order.ASCENDING),
+                        new ScanQuery.OrderBy("dim1", ScanQuery.Order.DESCENDING),
+                        new ScanQuery.OrderBy("v1", ScanQuery.Order.ASCENDING)
+                    )
+                )
+                .build()
+        )
+        .verify();
+  }
+
+  @Test
+  public void testInsertWithPartitionByAndClusterBy()
+  {
+    // Test correctness of the query when both PARTITION BY and CLUSTER BY clause is present
+    RowSignature targetRowSignature = RowSignature.builder()
+                                                  .add("__time", ColumnType.LONG)
+                                                  .add("floor_m1", ColumnType.FLOAT)
+                                                  .add("dim1", ColumnType.STRING)
+                                                  .build();
+
+    Map<String, Object> queryContext = new HashMap<>(DEFAULT_CONTEXT);
+    queryContext.put(QueryContexts.INGESTION_GRANULARITY, "day");
+
+    testInsertQuery()
+        .sql(
+            "INSERT INTO druid.dst SELECT __time, FLOOR(m1) as floor_m1, dim1 FROM foo PARTITION BY 'day' CLUSTER BY 2, dim1")
+        .expectTarget("dst", targetRowSignature)
+        .expectResources(dataSourceRead("foo"), dataSourceWrite("dst"))
+        .expectQuery(
+            newScanQueryBuilder()
+                .dataSource("foo")
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .columns("__time", "dim1", "v0")
+                .virtualColumns(expressionVirtualColumn("v0", "floor(\"m1\")", ColumnType.FLOAT))
+                .orderBy(
+                    ImmutableList.of(
+                        new ScanQuery.OrderBy("v0", ScanQuery.Order.ASCENDING),
+                        new ScanQuery.OrderBy("dim1", ScanQuery.Order.ASCENDING)
+                    )
+                )
+                .context(queryContext)
+                .build()
+        )
+        .verify();
+  }
+
+  @Test
+  public void testInsertWithPartitionByAndLimitOffset()
+  {
+    RowSignature targetRowSignature = RowSignature.builder()
+                                                  .add("__time", ColumnType.LONG)
+                                                  .add("floor_m1", ColumnType.FLOAT)
+                                                  .add("dim1", ColumnType.STRING)
+                                                  .build();
+
+    Map<String, Object> queryContext = new HashMap<>(DEFAULT_CONTEXT);
+    queryContext.put(QueryContexts.INGESTION_GRANULARITY, "day");
+
+    testInsertQuery()
+        .sql(
+            "INSERT INTO druid.dst SELECT __time, FLOOR(m1) as floor_m1, dim1 FROM foo LIMIT 10 OFFSET 20 PARTITION BY 'day'")
+        .expectTarget("dst", targetRowSignature)
+        .expectResources(dataSourceRead("foo"), dataSourceWrite("dst"))
+        .expectQuery(
+            newScanQueryBuilder()
+                .dataSource("foo")
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .columns("__time", "dim1", "v0")
+                .virtualColumns(expressionVirtualColumn("v0", "floor(\"m1\")", ColumnType.FLOAT))
+                .limit(10)
+                .offset(20)
+                .context(queryContext)
+                .build()
+        )
+        .verify();
+  }
+
+  @Test
+  public void testInsertWithPartitionByAndOrderBy()
+  {
+    RowSignature targetRowSignature = RowSignature.builder()
+                                                  .add("__time", ColumnType.LONG)
+                                                  .add("floor_m1", ColumnType.FLOAT)
+                                                  .add("dim1", ColumnType.STRING)
+                                                  .build();
+
+    Map<String, Object> queryContext = new HashMap<>(DEFAULT_CONTEXT);
+    queryContext.put(QueryContexts.INGESTION_GRANULARITY, "day");
+
+    testInsertQuery()
+        .sql(
+            "INSERT INTO druid.dst SELECT __time, FLOOR(m1) as floor_m1, dim1 FROM foo ORDER BY 2, dim1 PARTITION BY 'day'")
+        .expectTarget("dst", targetRowSignature)
+        .expectResources(dataSourceRead("foo"), dataSourceWrite("dst"))
+        .expectQuery(
+            newScanQueryBuilder()
+                .dataSource("foo")
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .columns("__time", "dim1", "v0")
+                .virtualColumns(expressionVirtualColumn("v0", "floor(\"m1\")", ColumnType.FLOAT))
+                .orderBy(
+                    ImmutableList.of(
+                        new ScanQuery.OrderBy("v0", ScanQuery.Order.ASCENDING),
+                        new ScanQuery.OrderBy("dim1", ScanQuery.Order.ASCENDING)
+                    )
+                )
+                .context(queryContext)
+                .build()
+        )
+        .verify();
+  }
+
+  @Test
+  public void testInsertWithClusterByAndOrderBy() throws Exception
+  {
+    try {
+      testQuery(
+          StringUtils.format(
+              "INSERT INTO dst SELECT * FROM %s ORDER BY 2 CLUSTER BY 3",
+              externSql(externalDataSource)
+          ),
+          ImmutableList.of(),
+          ImmutableList.of()
+      );
+      Assert.fail("Exception should be thrown");
+    }
+    catch (SqlPlanningException e) {
+      Assert.assertEquals(
+          "Cannot have both ORDER BY and CLUSTER BY clauses in the same INSERT query",
+          e.getMessage()
+      );
+    }
+    didTest = true;
+  }
+
+  @Test
+  public void testInsertWithPartitionByContainingInvalidGranularity() throws Exception
+  {
+    // Throws a ValidationException, which gets converted to a SqlPlanningException before throwing to end user
+    try {
+      testQuery(

Review comment:
       This works after the SQL planning has been done. We want to catch the errors before that. Therefore that won't work here. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org