You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/11/23 22:28:26 UTC

[GitHub] [arrow] alamb commented on a change in pull request #8734: ARROW-10680: [Rust] [DataFusion] Add partial support for TPC-H query 12

alamb commented on a change in pull request #8734:
URL: https://github.com/apache/arrow/pull/8734#discussion_r529036547



##########
File path: rust/benchmarks/src/bin/tpch.rs
##########
@@ -193,8 +143,105 @@ async fn benchmark(opt: BenchmarkOpt) -> Result<()> {
     Ok(())
 }
 
-async fn execute_sql(ctx: &mut ExecutionContext, sql: &str, debug: bool) -> Result<()> {
-    let plan = ctx.create_logical_plan(sql)?;
+fn create_logical_plan(ctx: &mut ExecutionContext, query: usize) -> Result<LogicalPlan> {
+    match query {
+        1 => ctx.create_logical_plan(
+            "select
+                    l_returnflag,
+                    l_linestatus,
+                    sum(l_quantity),
+                    sum(l_extendedprice),
+                    sum(l_extendedprice * (1 - l_discount)),
+                    sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)),
+                    avg(l_quantity),
+                    avg(l_extendedprice),
+                    avg(l_discount),
+                    count(*)
+                from
+                    lineitem
+                where
+                    l_shipdate <= '1998-12-01'
+                group by
+                    l_returnflag,
+                    l_linestatus
+                order by
+                    l_returnflag,
+                    l_linestatus",
+        ),
+
+        12 => {
+            // We do not have sufficient SQL support for this query yet
+
+            // "SELECT
+            //     l_shipmode,
+            //     sum(case
+            //         when o_orderpriority = '1-URGENT'
+            //             OR o_orderpriority = '2-HIGH'
+            //             then 1
+            //         else 0
+            //     end) as high_line_count,
+            //     sum(case
+            //         when o_orderpriority <> '1-URGENT'
+            //             AND o_orderpriority <> '2-HIGH'
+            //             then 1
+            //         else 0
+            //     end) AS low_line_count
+            // FROM
+            //     orders,
+            //     lineitem
+            // WHERE
+            //     o_orderkey = l_orderkey
+            //     AND l_shipmode in ('MAIL', 'SHIP')
+            //     AND l_commitdate < l_receiptdate
+            //     AND l_shipdate < l_commitdate
+            //     AND l_receiptdate >= date '1994-01-01'
+            //     AND l_receiptdate < date '1994-01-01' + interval '1' year
+            // GROUP BY
+            //     l_shipmode
+            // ORDER BY
+            //     l_shipmode"
+
+            Ok(ctx
+                .table("lineitem")?
+                .filter(
+                    col("l_shipmode")
+                        .eq(lit("MAIL"))
+                        .or(col("l_shipmode").eq(lit("SHIP"))),
+                )?
+                .filter(col("l_commitdate").lt(col("l_receiptdate")))?
+                .filter(col("l_shipdate").lt(col("l_commitdate")))?
+                .filter(col("l_receiptdate").gt_eq(lit("1994-01-01")))?
+                // we do not support date functions yet, so faking the "+ interval '1' year" part

Review comment:
       there is `to_timestmp`: https://github.com/apache/arrow/blob/master/rust/datafusion/src/physical_plan/datetime_expressions.rs#L81 but we still need support to support intervals




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org