You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by fj...@apache.org on 2019/07/04 06:03:57 UTC

[incubator-druid] branch master updated: multi-value string expression transformation fix (#8019)

This is an automated email from the ASF dual-hosted git repository.

fjy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git


The following commit(s) were added to refs/heads/master by this push:
     new e6ba258  multi-value string expression transformation fix (#8019)
e6ba258 is described below

commit e6ba258197468bfc8691b939fe00bd378139a095
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Wed Jul 3 23:03:47 2019 -0700

    multi-value string expression transformation fix (#8019)
    
    * multi-value string expression transformation fix
    
    * fixes
    
    * more docs and test
    
    * revert unintended doc change
    
    * formatting
    
    * change tostring to print binding identifier
    
    * review fixup
    
    * oops
---
 .../main/java/org/apache/druid/math/expr/Expr.java | 266 +++++++++++++--------
 .../apache/druid/math/expr/ExprListenerImpl.java   |  72 +++++-
 .../org/apache/druid/math/expr/ExprMacroTable.java |  17 +-
 .../java/org/apache/druid/math/expr/Parser.java    | 105 +++++---
 .../org/apache/druid/math/expr/ParserTest.java     |  59 ++++-
 .../aggregation/SimpleDoubleAggregatorFactory.java |   2 +-
 .../aggregation/SimpleFloatAggregatorFactory.java  |   2 +-
 .../aggregation/SimpleLongAggregatorFactory.java   |   2 +-
 .../aggregation/post/ExpressionPostAggregator.java |   2 +-
 .../druid/query/expression/TrimExprMacro.java      |  16 +-
 .../druid/query/filter/ExpressionDimFilter.java    |   2 +-
 .../druid/segment/filter/ExpressionFilter.java     |   2 +-
 .../druid/segment/virtual/ExpressionSelectors.java |  16 +-
 .../segment/virtual/ExpressionVirtualColumn.java   |   2 +-
 .../RowBasedExpressionColumnValueSelector.java     |   2 +-
 ...gInputCachingExpressionColumnValueSelector.java |   2 +-
 ...gInputCachingExpressionColumnValueSelector.java |   2 +-
 .../SingleStringInputDimensionSelector.java        |   2 +-
 .../druid/query/MultiValuedDimensionTest.java      |  48 ++++
 .../virtual/ExpressionVirtualColumnTest.java       |   2 +-
 .../apache/druid/sql/calcite/CalciteQueryTest.java | 114 +++++----
 21 files changed, 500 insertions(+), 237 deletions(-)

diff --git a/core/src/main/java/org/apache/druid/math/expr/Expr.java b/core/src/main/java/org/apache/druid/math/expr/Expr.java
index 271586b..07c20fa 100644
--- a/core/src/main/java/org/apache/druid/math/expr/Expr.java
+++ b/core/src/main/java/org/apache/druid/math/expr/Expr.java
@@ -32,8 +32,8 @@ import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.guava.Comparators;
 
 import javax.annotation.Nullable;
+import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
@@ -71,6 +71,15 @@ public interface Expr
   }
 
   /**
+   * Returns an {@link IdentifierExpr} if it is one, else null
+   */
+  @Nullable
+  default IdentifierExpr getIdentifierExprIfIdentifierExpr()
+  {
+    return null;
+  }
+
+  /**
    * Returns the string identifier of an {@link IdentifierExpr}, else null
    */
   @Nullable
@@ -81,6 +90,17 @@ public interface Expr
   }
 
   /**
+   * Returns the string identifier to use to get a value from {@link Expr.ObjectBinding} of an {@link IdentifierExpr},
+   * else null
+   */
+  @Nullable
+  default String getIdentifierBindingIfIdentifier()
+  {
+    // overridden by things that are identifiers
+    return null;
+  }
+
+  /**
    * Evaluate the {@link Expr} with the bindings which supply {@link IdentifierExpr} with their values, producing an
    * {@link ExprEval} with the result.
    */
@@ -88,7 +108,7 @@ public interface Expr
 
   /**
    * Programmatically inspect the {@link Expr} tree with a {@link Visitor}. Each {@link Expr} is responsible for
-   * ensuring the {@link Visitor} can visit all of its {@link Expr} children before visiting itself.
+   * ensuring the {@link Visitor} can visit all of its {@link Expr} children before visiting itself
    */
   void visit(Visitor visitor);
 
@@ -147,92 +167,157 @@ public interface Expr
    */
   class BindingDetails
   {
-    private final ImmutableSet<String> freeVariables;
-    private final ImmutableSet<String> scalarVariables;
-    private final ImmutableSet<String> arrayVariables;
+    private final ImmutableSet<IdentifierExpr> freeVariables;
+    private final ImmutableSet<IdentifierExpr> scalarVariables;
+    private final ImmutableSet<IdentifierExpr> arrayVariables;
 
     public BindingDetails()
     {
-      this(Collections.emptySet(), Collections.emptySet(), Collections.emptySet());
+      this(ImmutableSet.of(), ImmutableSet.of(), ImmutableSet.of());
     }
 
-    public BindingDetails(String identifier)
+    public BindingDetails(IdentifierExpr expr)
     {
-      this(ImmutableSet.of(identifier), Collections.emptySet(), Collections.emptySet());
+      this(ImmutableSet.of(expr), ImmutableSet.of(), ImmutableSet.of());
     }
 
-    public BindingDetails(Set<String> freeVariables, Set<String> scalarVariables, Set<String> arrayVariables)
+    public BindingDetails(
+        ImmutableSet<IdentifierExpr> freeVariables,
+        ImmutableSet<IdentifierExpr> scalarVariables,
+        ImmutableSet<IdentifierExpr> arrayVariables
+    )
     {
-      this.freeVariables = ImmutableSet.copyOf(freeVariables);
-      this.scalarVariables = ImmutableSet.copyOf(scalarVariables);
-      this.arrayVariables = ImmutableSet.copyOf(arrayVariables);
+      this.freeVariables = freeVariables;
+      this.scalarVariables = scalarVariables;
+      this.arrayVariables = arrayVariables;
     }
 
     /**
      * Get the list of required column inputs to evaluate an expression
      */
-    public ImmutableList<String> getRequiredColumns()
+    public List<String> getRequiredColumnsList()
+    {
+      return new ArrayList<>(freeVariables.stream().map(IdentifierExpr::getIdentifierBindingIfIdentifier).collect(Collectors.toSet()));
+    }
+
+    /**
+     * Get the set of required column inputs to evaluate an expression, {@link IdentifierExpr#bindingIdentifier}
+     */
+    public Set<String> getRequiredColumns()
+    {
+      return freeVariables.stream().map(IdentifierExpr::getIdentifierBindingIfIdentifier).collect(Collectors.toSet());
+    }
+
+    /**
+     * Set of {@link IdentifierExpr#bindingIdentifier} which are used with scalar operators and functions
+     */
+    public Set<String> getScalarColumns()
     {
-      return ImmutableList.copyOf(freeVariables);
+      return scalarVariables.stream().map(IdentifierExpr::getIdentifierBindingIfIdentifier).collect(Collectors.toSet());
+    }
+
+    /**
+     * Set of {@link IdentifierExpr#bindingIdentifier} which are used with array typed functions and apply functions.
+     */
+    public Set<String> getArrayColumns()
+    {
+      return arrayVariables.stream().map(IdentifierExpr::getIdentifierBindingIfIdentifier).collect(Collectors.toSet());
     }
 
     /**
      * Total set of 'free' identifiers of an {@link Expr}, that are not supplied by a {@link LambdaExpr} binding
      */
-    public ImmutableSet<String> getFreeVariables()
+    public Set<IdentifierExpr> getFreeVariables()
     {
       return freeVariables;
     }
 
     /**
-     * Set of identifiers which are used with scalar operators and functions
+     * Set of {@link IdentifierExpr#identifier} which are used with scalar operators and functions.
      */
-    public ImmutableSet<String> getScalarVariables()
+    public Set<String> getScalarVariables()
     {
-      return scalarVariables;
+      return scalarVariables.stream().map(IdentifierExpr::getIdentifier).collect(Collectors.toSet());
     }
 
     /**
-     * Set of identifiers which are used with array typed functions and apply functions.
+     * Set of {@link IdentifierExpr#identifier} which are used with array typed functions and apply functions.
      */
-    public ImmutableSet<String> getArrayVariables()
+    public Set<String> getArrayVariables()
     {
-      return arrayVariables;
+      return arrayVariables.stream().map(IdentifierExpr::getIdentifier).collect(Collectors.toSet());
     }
 
-    public BindingDetails merge(BindingDetails other)
+    /**
+     * Combine with {@link BindingDetails} from {@link Expr#analyzeInputs()}
+     */
+    public BindingDetails with(Expr other)
     {
-      return new BindingDetails(
-          Sets.union(freeVariables, other.freeVariables),
-          Sets.union(scalarVariables, other.scalarVariables),
-          Sets.union(arrayVariables, other.arrayVariables)
-      );
+      return with(other.analyzeInputs());
     }
 
-    public BindingDetails mergeWith(Set<String> moreScalars, Set<String> moreArrays)
+    /**
+     * Combine (union) another {@link BindingDetails}
+     */
+    public BindingDetails with(BindingDetails other)
     {
       return new BindingDetails(
-          Sets.union(freeVariables, Sets.union(moreScalars, moreArrays)),
-          Sets.union(scalarVariables, moreScalars),
-          Sets.union(arrayVariables, moreArrays)
+          ImmutableSet.copyOf(Sets.union(freeVariables, other.freeVariables)),
+          ImmutableSet.copyOf(Sets.union(scalarVariables, other.scalarVariables)),
+          ImmutableSet.copyOf(Sets.union(arrayVariables, other.arrayVariables))
       );
     }
 
-    public BindingDetails mergeWithScalars(Set<String> moreScalars)
+    /**
+     * Add set of arguments as {@link BindingDetails#scalarVariables} that are *directly* {@link IdentifierExpr},
+     * else they are ignored.
+     */
+    public BindingDetails withScalarArguments(Set<Expr> scalarArguments)
     {
+      Set<IdentifierExpr> moreScalars = new HashSet<>();
+      for (Expr expr : scalarArguments) {
+        final IdentifierExpr stringIdentifier = expr.getIdentifierExprIfIdentifierExpr();
+        if (stringIdentifier != null) {
+          moreScalars.add((IdentifierExpr) expr);
+        }
+      }
       return new BindingDetails(
-          Sets.union(freeVariables, moreScalars),
-          Sets.union(scalarVariables, moreScalars),
+          ImmutableSet.copyOf(Sets.union(freeVariables, moreScalars)),
+          ImmutableSet.copyOf(Sets.union(scalarVariables, moreScalars)),
           arrayVariables
       );
     }
 
-    public BindingDetails mergeWithArrays(Set<String> moreArrays)
+    /**
+     * Add set of arguments as {@link BindingDetails#arrayVariables} that are *directly* {@link IdentifierExpr},
+     * else they are ignored.
+     */
+    public BindingDetails withArrayArguments(Set<Expr> arrayArguments)
     {
+      Set<IdentifierExpr> arrayIdentifiers = new HashSet<>();
+      for (Expr expr : arrayArguments) {
+        final IdentifierExpr isIdentifier = expr.getIdentifierExprIfIdentifierExpr();
+        if (isIdentifier != null) {
+          arrayIdentifiers.add((IdentifierExpr) expr);
+        }
+      }
       return new BindingDetails(
-          Sets.union(freeVariables, moreArrays),
+          ImmutableSet.copyOf(Sets.union(freeVariables, arrayIdentifiers)),
           scalarVariables,
-          Sets.union(arrayVariables, moreArrays)
+          ImmutableSet.copyOf(Sets.union(arrayVariables, arrayIdentifiers))
+      );
+    }
+
+    /**
+     * Remove any {@link IdentifierExpr} that are from a {@link LambdaExpr}, since the {@link ApplyFunction} will
+     * provide bindings for these variables.
+     */
+    public BindingDetails removeLambdaArguments(Set<String> lambda)
+    {
+      return new BindingDetails(
+        ImmutableSet.copyOf(freeVariables.stream().filter(x -> !lambda.contains(x.getIdentifier())).iterator()),
+        ImmutableSet.copyOf(scalarVariables.stream().filter(x -> !lambda.contains(x.getIdentifier())).iterator()),
+        ImmutableSet.copyOf(arrayVariables.stream().filter(x -> !lambda.contains(x.getIdentifier())).iterator())
       );
     }
   }
@@ -482,18 +567,44 @@ class DoubleArrayExpr extends ConstantExpr
 class IdentifierExpr implements Expr
 {
   private final String identifier;
+  private final String bindingIdentifier;
 
   IdentifierExpr(String value)
   {
     this.identifier = value;
+    this.bindingIdentifier = value;
+  }
+
+  IdentifierExpr(String identifier, String bindingIdentifier)
+  {
+    this.identifier = identifier;
+    this.bindingIdentifier = bindingIdentifier;
   }
 
   @Override
   public String toString()
   {
+    return bindingIdentifier;
+  }
+
+  /**
+   * Unique identifier
+   */
+  @Nullable
+  public String getIdentifier()
+  {
     return identifier;
   }
 
+  /**
+   * Value binding identifier
+   */
+  @Nullable
+  public String getBindingIdentifier()
+  {
+    return bindingIdentifier;
+  }
+
   @Nullable
   @Override
   public String getIdentifierIfIdentifier()
@@ -501,16 +612,30 @@ class IdentifierExpr implements Expr
     return identifier;
   }
 
+  @Nullable
+  @Override
+  public String getIdentifierBindingIfIdentifier()
+  {
+    return bindingIdentifier;
+  }
+
+  @Nullable
+  @Override
+  public IdentifierExpr getIdentifierExprIfIdentifierExpr()
+  {
+    return this;
+  }
+
   @Override
   public BindingDetails analyzeInputs()
   {
-    return new BindingDetails(identifier);
+    return new BindingDetails(this);
   }
 
   @Override
   public ExprEval eval(ObjectBinding bindings)
   {
-    return ExprEval.bestEffortOf(bindings.get(identifier));
+    return ExprEval.bestEffortOf(bindings.get(bindingIdentifier));
   }
 
   @Override
@@ -600,11 +725,7 @@ class LambdaExpr implements Expr
   {
     final Set<String> lambdaArgs = args.stream().map(IdentifierExpr::toString).collect(Collectors.toSet());
     BindingDetails bodyDetails = expr.analyzeInputs();
-    return new BindingDetails(
-        Sets.difference(bodyDetails.getFreeVariables(), lambdaArgs),
-        Sets.difference(bodyDetails.getScalarVariables(), lambdaArgs),
-        Sets.difference(bodyDetails.getArrayVariables(), lambdaArgs)
-    );
+    return bodyDetails.removeLambdaArguments(lambdaArgs);
   }
 }
 
@@ -658,28 +779,13 @@ class FunctionExpr implements Expr
   @Override
   public BindingDetails analyzeInputs()
   {
-    final Set<String> scalarVariables = new HashSet<>();
-    final Set<String> arrayVariables = new HashSet<>();
-    final Set<Expr> scalarArgs = function.getScalarInputs(args);
-    final Set<Expr> arrayArgs = function.getArrayInputs(args);
     BindingDetails accumulator = new BindingDetails();
 
     for (Expr arg : args) {
-      accumulator = accumulator.merge(arg.analyzeInputs());
-    }
-    for (Expr arg : scalarArgs) {
-      String s = arg.getIdentifierIfIdentifier();
-      if (s != null) {
-        scalarVariables.add(s);
-      }
+      accumulator = accumulator.with(arg);
     }
-    for (Expr arg : arrayArgs) {
-      String s = arg.getIdentifierIfIdentifier();
-      if (s != null) {
-        arrayVariables.add(s);
-      }
-    }
-    return accumulator.mergeWith(scalarVariables, arrayVariables);
+    return accumulator.withScalarArguments(function.getScalarInputs(args))
+                      .withArrayArguments(function.getArrayInputs(args));
   }
 }
 
@@ -714,21 +820,11 @@ class ApplyFunctionExpr implements Expr
     for (Expr arg : argsExpr) {
       BindingDetails argDetails = arg.analyzeInputs();
       argBindingDetailsBuilder.add(argDetails);
-      accumulator = accumulator.merge(argDetails);
-    }
-
-    final Set<String> arrayVariables = new HashSet<>();
-    Set<Expr> arrayArgs = function.getArrayInputs(argsExpr);
-
-    for (Expr arg : arrayArgs) {
-      String s = arg.getIdentifierIfIdentifier();
-      if (s != null) {
-        arrayVariables.add(s);
-      }
+      accumulator = accumulator.with(argDetails);
     }
 
     lambdaBindingDetails = lambdaExpr.analyzeInputs();
-    bindingDetails = accumulator.merge(lambdaBindingDetails).mergeWithArrays(arrayVariables);
+    bindingDetails = accumulator.with(lambdaBindingDetails).withArrayArguments(function.getArrayInputs(argsExpr));
     argsBindingDetails = argBindingDetailsBuilder.build();
   }
 
@@ -804,14 +900,7 @@ abstract class UnaryExpr implements Expr
   public BindingDetails analyzeInputs()
   {
     // currently all unary operators only operate on scalar inputs
-    final Set<String> scalars;
-    final String identifierMaybe = expr.getIdentifierIfIdentifier();
-    if (identifierMaybe != null) {
-      scalars = ImmutableSet.of(identifierMaybe);
-    } else {
-      scalars = Collections.emptySet();
-    }
-    return expr.analyzeInputs().mergeWithScalars(scalars);
+    return expr.analyzeInputs().withScalarArguments(ImmutableSet.of(expr));
   }
 }
 
@@ -934,18 +1023,7 @@ abstract class BinaryOpExprBase implements Expr
   public BindingDetails analyzeInputs()
   {
     // currently all binary operators operate on scalar inputs
-    final Set<String> scalars = new HashSet<>();
-    final String leftIdentifer = left.getIdentifierIfIdentifier();
-    final String rightIdentifier = right.getIdentifierIfIdentifier();
-    if (leftIdentifer != null) {
-      scalars.add(leftIdentifer);
-    }
-    if (rightIdentifier != null) {
-      scalars.add(rightIdentifier);
-    }
-    return left.analyzeInputs()
-               .merge(right.analyzeInputs())
-               .mergeWithScalars(scalars);
+    return left.analyzeInputs().with(right).withScalarArguments(ImmutableSet.of(left, right));
   }
 }
 
diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java b/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java
index b4dc961..b4c91a3 100644
--- a/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java
+++ b/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java
@@ -24,14 +24,17 @@ import org.antlr.v4.runtime.tree.TerminalNode;
 import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.druid.annotations.UsedInGeneratedCode;
 import org.apache.druid.java.util.common.RE;
+import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.math.expr.antlr.ExprBaseListener;
 import org.apache.druid.math.expr.antlr.ExprParser;
 
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 /**
  * Implementation of antlr parse tree listener, transforms {@link ParseTree} to {@link Expr}, based on the grammar
@@ -44,11 +47,17 @@ public class ExprListenerImpl extends ExprBaseListener
   private final ExprMacroTable macroTable;
   private final ParseTree rootNodeKey;
 
+  private final Set<String> lambdaIdentifiers;
+  private final Set<String> uniqueIdentifiers;
+  private int uniqueCounter = 0;
+
   ExprListenerImpl(ParseTree rootNodeKey, ExprMacroTable macroTable)
   {
     this.rootNodeKey = rootNodeKey;
     this.macroTable = macroTable;
     this.nodes = new HashMap<>();
+    this.lambdaIdentifiers = new HashSet<>();
+    this.uniqueIdentifiers = new HashSet<>();
   }
 
   Expr getAST()
@@ -347,14 +356,19 @@ public class ExprListenerImpl extends ExprBaseListener
   @Override
   public void exitIdentifierExpr(ExprParser.IdentifierExprContext ctx)
   {
-    String text = ctx.getText();
-    if (text.charAt(0) == '"' && text.charAt(text.length() - 1) == '"') {
-      text = StringEscapeUtils.unescapeJava(text.substring(1, text.length() - 1));
+    final String text = sanitizeIdentifierString(ctx.getText());
+    nodes.put(ctx, createIdentifierExpr(text));
+  }
+
+  @Override
+  public void enterLambda(ExprParser.LambdaContext ctx)
+  {
+    // mark lambda identifiers on enter, for reference later when creating the IdentifierExpr inside of the lambdas
+    for (int i = 0; i < ctx.IDENTIFIER().size(); i++) {
+      String text = ctx.IDENTIFIER(i).getText();
+      text = sanitizeIdentifierString(text);
+      this.lambdaIdentifiers.add(text);
     }
-    nodes.put(
-        ctx,
-        new IdentifierExpr(text)
-    );
   }
 
   @Override
@@ -363,10 +377,10 @@ public class ExprListenerImpl extends ExprBaseListener
     List<IdentifierExpr> identifiers = new ArrayList<>(ctx.IDENTIFIER().size());
     for (int i = 0; i < ctx.IDENTIFIER().size(); i++) {
       String text = ctx.IDENTIFIER(i).getText();
-      if (text.charAt(0) == '"' && text.charAt(text.length() - 1) == '"') {
-        text = StringEscapeUtils.unescapeJava(text.substring(1, text.length() - 1));
-      }
-      identifiers.add(i, new IdentifierExpr(text));
+      text = sanitizeIdentifierString(text);
+      identifiers.add(i, createIdentifierExpr(text));
+      // clean up lambda identifier references on exit
+      lambdaIdentifiers.remove(text);
     }
 
     nodes.put(ctx, new LambdaExpr(identifiers, (Expr) nodes.get(ctx.expr())));
@@ -405,6 +419,42 @@ public class ExprListenerImpl extends ExprBaseListener
     nodes.put(ctx, new StringArrayExpr(new String[0]));
   }
 
+  /**
+   * All {@link IdentifierExpr} that are *not* bound to a {@link LambdaExpr} identifier, will recieve a unique
+   * {@link IdentifierExpr#identifier} value which may or may not be the same as the
+   * {@link IdentifierExpr#bindingIdentifier} value. {@link LambdaExpr} identifiers however, will always have
+   * {@link IdentifierExpr#identifier} be the same as {@link IdentifierExpr#bindingIdentifier} because they have
+   * synthetic bindings set at evaluation time. This is done to aid in analysis needed for the automatic expression
+   * translation which maps scalar expressions to multi-value inputs. See
+   * {@link Parser#applyUnappliedIdentifiers(Expr, Expr.BindingDetails, List)}} for additional details.
+   */
+  private IdentifierExpr createIdentifierExpr(String binding)
+  {
+    if (!lambdaIdentifiers.contains(binding)) {
+      String uniqueIdentifier = binding;
+      while (uniqueIdentifiers.contains(uniqueIdentifier)) {
+        uniqueIdentifier = StringUtils.format("%s_%s", binding, uniqueCounter++);
+      }
+      uniqueIdentifiers.add(uniqueIdentifier);
+      return new IdentifierExpr(uniqueIdentifier, binding);
+    }
+    return new IdentifierExpr(binding);
+  }
+
+  /**
+   * Remove double quotes from an identifier variable string, returning unqouted identifier
+   */
+  private static String sanitizeIdentifierString(String text)
+  {
+    if (text.charAt(0) == '"' && text.charAt(text.length() - 1) == '"') {
+      text = StringEscapeUtils.unescapeJava(text.substring(1, text.length() - 1));
+    }
+    return text;
+  }
+
+  /**
+   * Remove single quote from a string literal, returning unquoted string value
+   */
   private static String escapeStringLiteral(String text)
   {
     String unquoted = text.substring(1, text.length() - 1);
diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java b/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java
index 370c5a1..fce44f4 100644
--- a/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java
+++ b/core/src/main/java/org/apache/druid/math/expr/ExprMacroTable.java
@@ -112,11 +112,7 @@ public class ExprMacroTable
     @Override
     public BindingDetails analyzeInputs()
     {
-      final String identifier = arg.getIdentifierIfIdentifier();
-      if (identifier == null) {
-        return arg.analyzeInputs();
-      }
-      return arg.analyzeInputs().mergeWithScalars(ImmutableSet.of(identifier));
+      return arg.analyzeInputs().withScalarArguments(ImmutableSet.of(arg));
     }
   }
 
@@ -145,16 +141,13 @@ public class ExprMacroTable
     @Override
     public BindingDetails analyzeInputs()
     {
-      Set<String> scalars = new HashSet<>();
+      final Set<Expr> argSet = new HashSet<>(args.size());
       BindingDetails accumulator = new BindingDetails();
       for (Expr arg : args) {
-        final String identifier = arg.getIdentifierIfIdentifier();
-        if (identifier != null) {
-          scalars.add(identifier);
-        }
-        accumulator = accumulator.merge(arg.analyzeInputs());
+        accumulator = accumulator.with(arg);
+        argSet.add(arg);
       }
-      return accumulator.mergeWithScalars(scalars);
+      return accumulator.withScalarArguments(argSet);
     }
   }
 }
diff --git a/core/src/main/java/org/apache/druid/math/expr/Parser.java b/core/src/main/java/org/apache/druid/math/expr/Parser.java
index 59ed0dc..55144ab 100644
--- a/core/src/main/java/org/apache/druid/math/expr/Parser.java
+++ b/core/src/main/java/org/apache/druid/math/expr/Parser.java
@@ -171,13 +171,9 @@ public class Parser
       return expr;
     }
     List<String> unapplied = toApply.stream()
-                                     .filter(x -> bindingDetails.getFreeVariables().contains(x))
+                                     .filter(x -> bindingDetails.getRequiredColumns().contains(x))
                                      .collect(Collectors.toList());
 
-    ApplyFunction fn;
-    final LambdaExpr lambdaExpr;
-    final List<Expr> args;
-
     // any unapplied identifiers that are inside a lambda expression need that lambda expression to be rewritten
     Expr newExpr = expr.visit(
         childExpr -> {
@@ -215,26 +211,52 @@ public class Parser
       return newExpr;
     }
 
-    // else, it *should be safe* to wrap in either map or cartesian_map because we still have missing bindings that
-    // were *not* referenced in a lambda body
-    if (remainingUnappliedArgs.size() == 1) {
+    return applyUnapplied(newExpr, remainingUnappliedArgs);
+  }
+
+  /**
+   * translate an {@link Expr} into an {@link ApplyFunctionExpr} for {@link ApplyFunction.MapFunction} or
+   * {@link ApplyFunction.CartesianMapFunction} if there are multiple unbound arguments to be applied
+   */
+  private static Expr applyUnapplied(Expr expr, List<String> unapplied)
+  {
+    // wrap an expression in either map or cartesian_map to apply any unapplied identifiers
+    final Map<IdentifierExpr, IdentifierExpr> toReplace = new HashMap<>();
+    final List<IdentifierExpr> args = expr.analyzeInputs()
+                                          .getFreeVariables()
+                                          .stream()
+                                          .filter(x -> unapplied.contains(x.getBindingIdentifier()))
+                                          .collect(Collectors.toList());
+
+    final List<IdentifierExpr> lambdaArgs = new ArrayList<>();
+
+    // construct lambda args from list of args to apply
+    for (IdentifierExpr applyFnArg : args) {
+      IdentifierExpr lambdaRewrite = new IdentifierExpr(applyFnArg.getIdentifier());
+      lambdaArgs.add(lambdaRewrite);
+      toReplace.put(applyFnArg, lambdaRewrite);
+    }
+
+    // rewrite identifiers in the expression which will become the lambda body, so they match the lambda identifiers we
+    // are constructing
+    Expr newExpr = expr.visit(childExpr -> {
+      if (childExpr instanceof IdentifierExpr) {
+        if (toReplace.containsKey(childExpr)) {
+          return toReplace.get(childExpr);
+        }
+      }
+      return childExpr;
+    });
+
+    final LambdaExpr lambdaExpr = new LambdaExpr(lambdaArgs, newExpr);
+    final ApplyFunction fn;
+    if (args.size() == 1) {
       fn = new ApplyFunction.MapFunction();
-      IdentifierExpr lambdaArg = new IdentifierExpr(remainingUnappliedArgs.iterator().next());
-      lambdaExpr = new LambdaExpr(ImmutableList.of(lambdaArg), newExpr);
-      args = ImmutableList.of(lambdaArg);
     } else {
       fn = new ApplyFunction.CartesianMapFunction();
-      List<IdentifierExpr> identifiers = new ArrayList<>(remainingUnappliedArgs.size());
-      args = new ArrayList<>(remainingUnappliedArgs.size());
-      for (String remainingUnappliedArg : remainingUnappliedArgs) {
-        IdentifierExpr arg = new IdentifierExpr(remainingUnappliedArg);
-        identifiers.add(arg);
-        args.add(arg);
-      }
-      lambdaExpr = new LambdaExpr(identifiers, newExpr);
     }
 
-    Expr magic = new ApplyFunctionExpr(fn, fn.name(), lambdaExpr, args);
+    final Expr magic = new ApplyFunctionExpr(fn, fn.name(), lambdaExpr, ImmutableList.copyOf(args));
     return magic;
   }
 
@@ -249,28 +271,38 @@ public class Parser
    */
   private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List<String> unappliedArgs)
   {
-
     // recursively evaluate arguments to ensure they are properly transformed into arrays as necessary
-    List<String> unappliedInThisApply =
+    Set<String> unappliedInThisApply =
         unappliedArgs.stream()
-                     .filter(u -> !expr.bindingDetails.getArrayVariables().contains(u))
-                     .collect(Collectors.toList());
+                     .filter(u -> !expr.bindingDetails.getArrayColumns().contains(u))
+                     .collect(Collectors.toSet());
+
+    List<String> unappliedIdentifiers =
+        expr.bindingDetails
+            .getFreeVariables()
+            .stream()
+            .filter(x -> unappliedInThisApply.contains(x.getIdentifierBindingIfIdentifier()))
+            .map(IdentifierExpr::getIdentifierIfIdentifier)
+            .collect(Collectors.toList());
 
     List<Expr> newArgs = new ArrayList<>();
     for (int i = 0; i < expr.argsExpr.size(); i++) {
-      newArgs.add(applyUnappliedIdentifiers(
-          expr.argsExpr.get(i),
-          expr.argsBindingDetails.get(i),
-          unappliedInThisApply)
+      newArgs.add(
+          applyUnappliedIdentifiers(
+              expr.argsExpr.get(i),
+              expr.argsBindingDetails.get(i),
+              unappliedIdentifiers
+          )
       );
     }
 
     // this will _not_ include the lambda identifiers.. anything in this list needs to be applied
-    List<IdentifierExpr> unappliedLambdaBindings = expr.lambdaBindingDetails.getFreeVariables()
-                                                         .stream()
-                                                         .filter(unappliedArgs::contains)
-                                                         .map(IdentifierExpr::new)
-                                                         .collect(Collectors.toList());
+    List<IdentifierExpr> unappliedLambdaBindings =
+        expr.lambdaBindingDetails.getFreeVariables()
+                                 .stream()
+                                 .filter(x -> unappliedArgs.contains(x.getIdentifierBindingIfIdentifier()))
+                                 .map(x -> new IdentifierExpr(x.getIdentifier(), x.getBindingIdentifier()))
+                                 .collect(Collectors.toList());
 
     if (unappliedLambdaBindings.size() == 0) {
       return new ApplyFunctionExpr(expr.function, expr.name, expr.lambdaExpr, newArgs);
@@ -321,11 +353,12 @@ public class Parser
         // cartesian_fold((x, y, acc) -> acc + x + y + z, x, y, acc) =>
         //  cartesian_fold((x, y, z, acc) -> acc + x + y + z, x, y, z, acc)
 
-        final List<Expr> newFoldArgs = new ArrayList<>(expr.argsExpr.size() + unappliedLambdaBindings.size());
+        final List<Expr> newFoldArgs =
+            new ArrayList<>(expr.argsExpr.size() + unappliedLambdaBindings.size());
         final List<IdentifierExpr> newFoldLambdaIdentifiers =
             new ArrayList<>(expr.lambdaExpr.getIdentifiers().size() + unappliedLambdaBindings.size());
         final List<IdentifierExpr> existingFoldLambdaIdentifiers = expr.lambdaExpr.getIdentifierExprs();
-        // accumulator argument is last argument, slice it off when constructing new arg list and lambda args identifiers
+        // accumulator argument is last argument, slice it off when constructing new arg list and lambda args
         for (int i = 0; i < expr.argsExpr.size() - 1; i++) {
           newFoldArgs.add(expr.argsExpr.get(i));
           newFoldLambdaIdentifiers.add(existingFoldLambdaIdentifiers.get(i));
@@ -353,7 +386,7 @@ public class Parser
   public static void validateExpr(Expr expression, Expr.BindingDetails bindingDetails)
   {
     final Set<String> conflicted =
-        Sets.intersection(bindingDetails.getScalarVariables(), bindingDetails.getArrayVariables());
+        Sets.intersection(bindingDetails.getScalarColumns(), bindingDetails.getArrayColumns());
     if (conflicted.size() != 0) {
       throw new RE("Invalid expression: %s; %s used as both scalar and array variables", expression, conflicted);
     }
diff --git a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java
index aa40eb5..f70a716 100644
--- a/core/src/test/java/org/apache/druid/math/expr/ParserTest.java
+++ b/core/src/test/java/org/apache/druid/math/expr/ParserTest.java
@@ -75,6 +75,7 @@ public class ParserTest
     validateParser("x!=y", "(!= x y)", ImmutableList.of("x", "y"));
     validateParser("x && y", "(&& x y)", ImmutableList.of("x", "y"));
     validateParser("x || y", "(|| x y)", ImmutableList.of("x", "y"));
+
   }
 
   @Test
@@ -92,7 +93,7 @@ public class ParserTest
     validateParser("x-y+z", "(+ (- x y) z)", ImmutableList.of("x", "y", "z"));
     validateParser("x-y-z", "(- (- x y) z)", ImmutableList.of("x", "y", "z"));
 
-    validateParser("x-y-x", "(- (- x y) x)", ImmutableList.of("x", "y"));
+    validateParser("x-y-x", "(- (- x y) x)", ImmutableList.of("x", "y"), ImmutableSet.of("x", "x_0", "y"));
   }
 
   @Test
@@ -195,7 +196,7 @@ public class ParserTest
   public void testFunctions()
   {
     validateParser("sqrt(x)", "(sqrt [x])", ImmutableList.of("x"));
-    validateParser("if(cond,then,else)", "(if [cond, then, else])", ImmutableList.of("cond", "then", "else"));
+    validateParser("if(cond,then,else)", "(if [cond, then, else])", ImmutableList.of("else", "then", "cond"));
     validateParser("cast(x, 'STRING')", "(cast [x, STRING])", ImmutableList.of("x"));
     validateParser("cast(x, 'LONG')", "(cast [x, LONG])", ImmutableList.of("x"));
     validateParser("cast(x, 'DOUBLE')", "(cast [x, DOUBLE])", ImmutableList.of("x"));
@@ -275,12 +276,12 @@ public class ParserTest
         "(+ x (map ([x] -> (+ x 1)), [x]))",
         ImmutableList.of("x"),
         ImmutableSet.of("x"),
-        ImmutableSet.of("x")
+        ImmutableSet.of("x_0")
     );
     validateParser(
         "map((x) -> concat(x, y), z)",
         "(map ([x] -> (concat [x, y])), [z])",
-        ImmutableList.of("z", "y"),
+        ImmutableList.of("y", "z"),
         ImmutableSet.of("y"),
         ImmutableSet.of("z")
     );
@@ -303,14 +304,14 @@ public class ParserTest
     validateParser(
         "array_append(z, fold((x, acc) -> acc + x, map((x) -> x + 1, x), y))",
         "(array_append [z, (fold ([x, acc] -> (+ acc x)), [(map ([x] -> (+ x 1)), [x]), y])])",
-        ImmutableList.of("z", "x", "y"),
+        ImmutableList.of("x", "y", "z"),
         ImmutableSet.of(),
         ImmutableSet.of("x", "z")
     );
     validateParser(
         "map(z -> z + 1, array_append(z, fold((x, acc) -> acc + x, map((x) -> x + 1, x), y)))",
         "(map ([z] -> (+ z 1)), [(array_append [z, (fold ([x, acc] -> (+ acc x)), [(map ([x] -> (+ x 1)), [x]), y])])])",
-        ImmutableList.of("z", "x", "y"),
+        ImmutableList.of("x", "y", "z"),
         ImmutableSet.of(),
         ImmutableSet.of("x", "z")
     );
@@ -318,7 +319,7 @@ public class ParserTest
     validateParser(
         "array_append(map(z -> z + 1, array_append(z, fold((x, acc) -> acc + x, map((x) -> x + 1, x), y))), a)",
         "(array_append [(map ([z] -> (+ z 1)), [(array_append [z, (fold ([x, acc] -> (+ acc x)), [(map ([x] -> (+ x 1)), [x]), y])])]), a])",
-        ImmutableList.of("z", "x", "y", "a"),
+        ImmutableList.of("a", "x", "y", "z"),
         ImmutableSet.of("a"),
         ImmutableSet.of("x", "z")
     );
@@ -400,6 +401,48 @@ public class ParserTest
     );
   }
 
+  @Test
+  public void testUniquify()
+  {
+    validateParser("x-x", "(- x x)", ImmutableList.of("x"), ImmutableSet.of("x", "x_0"));
+    validateParser(
+        "x - x + x",
+        "(+ (- x x) x)",
+        ImmutableList.of("x"),
+        ImmutableSet.of("x", "x_0", "x_1")
+    );
+
+    validateParser(
+        "map((x) -> x + x, x)",
+        "(map ([x] -> (+ x x)), [x])",
+        ImmutableList.of("x"),
+        ImmutableSet.of(),
+        ImmutableSet.of("x")
+    );
+
+    validateApplyUnapplied(
+        "x + x",
+        "(+ x x)",
+        "(cartesian_map ([x, x_0] -> (+ x x_0)), [x, x])",
+        ImmutableList.of("x")
+    );
+
+    validateApplyUnapplied(
+        "x + x + x",
+        "(+ (+ x x) x)",
+        "(cartesian_map ([x, x_0, x_1] -> (+ (+ x x_0) x_1)), [x, x, x])",
+        ImmutableList.of("x")
+    );
+
+    // heh
+    validateApplyUnapplied(
+        "x + x + x + y + y + y + y + z + z + z",
+        "(+ (+ (+ (+ (+ (+ (+ (+ (+ x x) x) y) y) y) y) z) z) z)",
+        "(cartesian_map ([x, x_0, x_1, y, y_2, y_3, y_4, z, z_5, z_6] -> (+ (+ (+ (+ (+ (+ (+ (+ (+ x x_0) x_1) y) y_2) y_3) y_4) z) z_5) z_6)), [x, x, x, y, y, y, y, z, z, z])",
+        ImmutableList.of("x", "y", "z")
+    );
+  }
+
 
   private void validateFlatten(String expression, String withoutFlatten, String withFlatten)
   {
@@ -428,7 +471,7 @@ public class ParserTest
     final Expr parsed = Parser.parse(expression, ExprMacroTable.nil());
     final Expr.BindingDetails deets = parsed.analyzeInputs();
     Assert.assertEquals(expression, expected, parsed.toString());
-    Assert.assertEquals(expression, identifiers, deets.getRequiredColumns());
+    Assert.assertEquals(expression, identifiers, deets.getRequiredColumnsList());
     Assert.assertEquals(expression, scalars, deets.getScalarVariables());
     Assert.assertEquals(expression, arrays, deets.getArrayVariables());
   }
diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java
index d20b07a..6995d57 100644
--- a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java
+++ b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java
@@ -121,7 +121,7 @@ public abstract class SimpleDoubleAggregatorFactory extends NullableAggregatorFa
   {
     return fieldName != null
            ? Collections.singletonList(fieldName)
-           : fieldExpression.get().analyzeInputs().getRequiredColumns();
+           : fieldExpression.get().analyzeInputs().getRequiredColumnsList();
   }
 
   @Override
diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java
index 92dbc97..6619126 100644
--- a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java
+++ b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java
@@ -115,7 +115,7 @@ public abstract class SimpleFloatAggregatorFactory extends NullableAggregatorFac
   {
     return fieldName != null
            ? Collections.singletonList(fieldName)
-           : fieldExpression.get().analyzeInputs().getRequiredColumns();
+           : fieldExpression.get().analyzeInputs().getRequiredColumnsList();
   }
 
   @Override
diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java
index 3a77e3c..308100c 100644
--- a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java
+++ b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java
@@ -111,7 +111,7 @@ public abstract class SimpleLongAggregatorFactory extends NullableAggregatorFact
   {
     return fieldName != null
            ? Collections.singletonList(fieldName)
-           : fieldExpression.get().analyzeInputs().getRequiredColumns();
+           : fieldExpression.get().analyzeInputs().getRequiredColumnsList();
   }
 
   @Override
diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/post/ExpressionPostAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/post/ExpressionPostAggregator.java
index 8442195..281356c 100644
--- a/processing/src/main/java/org/apache/druid/query/aggregation/post/ExpressionPostAggregator.java
+++ b/processing/src/main/java/org/apache/druid/query/aggregation/post/ExpressionPostAggregator.java
@@ -118,7 +118,7 @@ public class ExpressionPostAggregator implements PostAggregator
         macroTable,
         finalizers,
         parsed,
-        Suppliers.memoize(() -> parsed.get().analyzeInputs().getFreeVariables()));
+        Suppliers.memoize(() -> parsed.get().analyzeInputs().getRequiredColumns()));
   }
 
   private ExpressionPostAggregator(
diff --git a/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java
index e3b49db..c7546a5 100644
--- a/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java
+++ b/processing/src/main/java/org/apache/druid/query/expression/TrimExprMacro.java
@@ -19,15 +19,14 @@
 
 package org.apache.druid.query.expression;
 
+import com.google.common.collect.ImmutableSet;
 import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.math.expr.Expr;
 import org.apache.druid.math.expr.ExprEval;
 import org.apache.druid.math.expr.ExprMacroTable;
 
 import javax.annotation.Nonnull;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 public abstract class TrimExprMacro implements ExprMacroTable.ExprMacro
 {
@@ -239,16 +238,9 @@ public abstract class TrimExprMacro implements ExprMacroTable.ExprMacro
     @Override
     public BindingDetails analyzeInputs()
     {
-      final String stringIdentifier = stringExpr.getIdentifierIfIdentifier();
-      final Set<String> scalars = new HashSet<>();
-      if (stringIdentifier != null) {
-        scalars.add(stringIdentifier);
-      }
-      final String charsIdentifier = charsExpr.getIdentifierIfIdentifier();
-      if (charsIdentifier != null) {
-        scalars.add(charsIdentifier);
-      }
-      return stringExpr.analyzeInputs().merge(charsExpr.analyzeInputs()).mergeWithScalars(scalars);
+      return stringExpr.analyzeInputs()
+                       .with(charsExpr)
+                       .withScalarArguments(ImmutableSet.of(stringExpr, charsExpr));
     }
   }
 
diff --git a/processing/src/main/java/org/apache/druid/query/filter/ExpressionDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/ExpressionDimFilter.java
index 4e731e0..0a67652 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/ExpressionDimFilter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/ExpressionDimFilter.java
@@ -77,7 +77,7 @@ public class ExpressionDimFilter implements DimFilter
   @Override
   public HashSet<String> getRequiredColumns()
   {
-    return Sets.newHashSet(parsed.get().analyzeInputs().getFreeVariables());
+    return Sets.newHashSet(parsed.get().analyzeInputs().getRequiredColumns());
   }
 
   @Override
diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
index 5816c0c..f46cb8d 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
@@ -48,7 +48,7 @@ public class ExpressionFilter implements Filter
   public ExpressionFilter(final Supplier<Expr> expr)
   {
     this.expr = expr;
-    this.requiredBindings = Suppliers.memoize(() -> expr.get().analyzeInputs().getFreeVariables());
+    this.requiredBindings = Suppliers.memoize(() -> expr.get().analyzeInputs().getRequiredColumns());
   }
 
   @Override
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
index 0ef0b48..edd2aa6 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
@@ -142,7 +142,7 @@ public class ExpressionSelectors
   {
     final Expr.BindingDetails exprDetails = expression.analyzeInputs();
     Parser.validateExpr(expression, exprDetails);
-    final List<String> columns = exprDetails.getRequiredColumns();
+    final List<String> columns = exprDetails.getRequiredColumnsList();
 
     if (columns.size() == 1) {
       final String column = Iterables.getOnlyElement(columns);
@@ -160,7 +160,7 @@ public class ExpressionSelectors
                  && capabilities.isDictionaryEncoded()
                  && capabilities.isComplete()
                  && !capabilities.hasMultipleValues()
-                 && !exprDetails.getArrayVariables().contains(column)) {
+                 && !exprDetails.getArrayColumns().contains(column)) {
         // Optimization for expressions that hit one string column and nothing else.
         return new SingleStringInputCachingExpressionColumnValueSelector(
             columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ValueType.STRING)),
@@ -176,7 +176,7 @@ public class ExpressionSelectors
 
     final List<String> needsApplied =
         columns.stream()
-               .filter(c -> actualArrays.contains(c) && !exprDetails.getArrayVariables().contains(c))
+               .filter(c -> actualArrays.contains(c) && !exprDetails.getArrayColumns().contains(c))
                .collect(Collectors.toList());
     final Expr finalExpr;
     if (needsApplied.size() > 0) {
@@ -219,7 +219,7 @@ public class ExpressionSelectors
   {
     final Expr.BindingDetails exprDetails = expression.analyzeInputs();
     Parser.validateExpr(expression, exprDetails);
-    final List<String> columns = exprDetails.getRequiredColumns();
+    final List<String> columns = exprDetails.getRequiredColumnsList();
 
 
     if (columns.size() == 1) {
@@ -231,7 +231,7 @@ public class ExpressionSelectors
           && capabilities.isDictionaryEncoded()
           && capabilities.isComplete()
           && !capabilities.hasMultipleValues()
-          && !exprDetails.getArrayVariables().contains(column)
+          && !exprDetails.getArrayColumns().contains(column)
       ) {
         // Optimization for dimension selectors that wrap a single underlying string column.
         return new SingleStringInputDimensionSelector(
@@ -249,7 +249,7 @@ public class ExpressionSelectors
 
     final ColumnValueSelector<ExprEval> baseSelector = makeExprEvalSelector(columnSelectorFactory, expression);
     final boolean multiVal = actualArrays.size() > 0 ||
-                             exprDetails.getArrayVariables().size() > 0 ||
+                             exprDetails.getArrayColumns().size() > 0 ||
                              unknownIfArrays.size() > 0;
 
     if (baseSelector instanceof ConstantExprEvalSelector) {
@@ -355,7 +355,7 @@ public class ExpressionSelectors
   )
   {
     final Map<String, Supplier<Object>> suppliers = new HashMap<>();
-    final List<String> columns = bindingDetails.getRequiredColumns();
+    final List<String> columns = bindingDetails.getRequiredColumnsList();
     for (String columnName : columns) {
       final ColumnCapabilities columnCapabilities = columnSelectorFactory
           .getColumnCapabilities(columnName);
@@ -530,7 +530,7 @@ public class ExpressionSelectors
         } else if (
             !capabilities.isComplete() &&
             capabilities.getType().equals(ValueType.STRING) &&
-            !exprDetails.getArrayVariables().contains(column)
+            !exprDetails.getArrayColumns().contains(column)
         ) {
           unknownIfArrays.add(column);
         }
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
index d71ac28..871682d4 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
@@ -111,7 +111,7 @@ public class ExpressionVirtualColumn implements VirtualColumn
   @Override
   public List<String> requiredColumns()
   {
-    return parsedExpression.get().analyzeInputs().getRequiredColumns();
+    return parsedExpression.get().analyzeInputs().getRequiredColumnsList();
   }
 
   @Override
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java
index e34f26a..719f664 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/RowBasedExpressionColumnValueSelector.java
@@ -53,7 +53,7 @@ public class RowBasedExpressionColumnValueSelector extends ExpressionColumnValue
   {
     super(expression, bindings);
     this.unknownColumns = unknownColumnsSet.stream()
-                                           .filter(x -> !baseExprBindingDetails.getArrayVariables().contains(x))
+                                           .filter(x -> !baseExprBindingDetails.getArrayColumns().contains(x))
                                            .collect(Collectors.toList());
     this.baseExprBindingDetails = baseExprBindingDetails;
     this.ignoredColumns = new HashSet<>();
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java
index af71a99..8fb9cdc 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java
@@ -59,7 +59,7 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
   )
   {
     // Verify expression has just one binding.
-    if (expression.analyzeInputs().getFreeVariables().size() != 1) {
+    if (expression.analyzeInputs().getRequiredColumns().size() != 1) {
       throw new ISE("WTF?! Expected expression with just one binding");
     }
 
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java
index 4d358e0..d9581e7 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java
@@ -54,7 +54,7 @@ public class SingleStringInputCachingExpressionColumnValueSelector implements Co
   )
   {
     // Verify expression has just one binding.
-    if (expression.analyzeInputs().getFreeVariables().size() != 1) {
+    if (expression.analyzeInputs().getRequiredColumns().size() != 1) {
       throw new ISE("WTF?! Expected expression with just one binding");
     }
 
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java
index 275869a..fb372dd 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java
@@ -55,7 +55,7 @@ public class SingleStringInputDimensionSelector implements DimensionSelector
   )
   {
     // Verify expression has just one binding.
-    if (expression.analyzeInputs().getFreeVariables().size() != 1) {
+    if (expression.analyzeInputs().getRequiredColumns().size() != 1) {
       throw new ISE("WTF?! Expected expression with just one binding");
     }
 
diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java
index 4ff31b5..616a49c 100644
--- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java
+++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java
@@ -47,6 +47,8 @@ import org.apache.druid.query.groupby.GroupByQuery;
 import org.apache.druid.query.groupby.GroupByQueryConfig;
 import org.apache.druid.query.groupby.GroupByQueryRunnerTest;
 import org.apache.druid.query.groupby.GroupByQueryRunnerTestHelper;
+import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
+import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
 import org.apache.druid.query.groupby.strategy.GroupByStrategySelector;
 import org.apache.druid.query.spec.LegacySegmentSpec;
 import org.apache.druid.query.topn.TopNQuery;
@@ -572,6 +574,52 @@ public class MultiValuedDimensionTest
   }
 
   @Test
+  public void testGroupByExpressionMultiMultiAutoAutoDupeIdentifier()
+  {
+    if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(RuntimeException.class);
+      expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality.");
+    }
+    GroupByQuery query = GroupByQuery
+        .builder()
+        .setDataSource("xx")
+        .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000"))
+        .setGranularity(Granularities.ALL)
+        .setDimensions(new DefaultDimensionSpec("texpr", "texpr"))
+        .setVirtualColumns(
+            new ExpressionVirtualColumn(
+                "texpr",
+                "concat(tags, tags)",
+                ValueType.STRING,
+                TestExprMacroTable.INSTANCE
+            )
+        )
+        .setLimitSpec(new DefaultLimitSpec(ImmutableList.of(new OrderByColumnSpec("count", OrderByColumnSpec.Direction.DESCENDING)), 5))
+        .setAggregatorSpecs(new CountAggregatorFactory("count"))
+        .setContext(context)
+        .build();
+
+    Sequence<Row> result = helper.runQueryOnSegmentsObjs(
+        ImmutableList.of(
+            new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")),
+            new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))
+        ),
+        query
+    );
+
+    List<Row> expectedResults = Arrays.asList(
+        GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t3t3", "count", 4L),
+        GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t5t5", "count", 4L),
+        GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t2t1", "count", 2L),
+        GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t1t2", "count", 2L),
+        GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "texpr", "t7t7", "count", 2L)
+    );
+
+    System.out.println(result.toList());
+    TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi-auto-auto-self");
+  }
+
+  @Test
   public void testGroupByExpressionMultiMultiAutoAutoWithFilter()
   {
     if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
index a29cc6e..4e721da 100644
--- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
@@ -588,7 +588,7 @@ public class ExpressionVirtualColumnTest
     Assert.assertEquals(ImmutableList.of("x", "y"), X_PLUS_Y.requiredColumns());
     Assert.assertEquals(ImmutableList.of(), CONSTANT_LIKE.requiredColumns());
     Assert.assertEquals(ImmutableList.of("z"), Z_LIKE.requiredColumns());
-    Assert.assertEquals(ImmutableList.of("z", "x"), Z_CONCAT_X.requiredColumns());
+    Assert.assertEquals(ImmutableList.of("x", "z"), Z_CONCAT_X.requiredColumns());
   }
 
   @Test
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
index f28c5b2..7ec8162 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
@@ -7954,6 +7954,44 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
   }
 
   @Test
+  public void testNvlColumns() throws Exception
+  {
+    testQuery(
+        "SELECT NVL(dim2, dim1), COUNT(*) FROM druid.foo GROUP BY NVL(dim2, dim1)\n",
+        ImmutableList.of(
+            GroupByQuery.builder()
+                        .setDataSource(CalciteTests.DATASOURCE1)
+                        .setInterval(querySegmentSpec(Filtration.eternity()))
+                        .setGranularity(Granularities.ALL)
+                        .setVirtualColumns(
+                            expressionVirtualColumn(
+                                "v0",
+                                "case_searched(notnull(\"dim2\"),\"dim2\",\"dim1\")",
+                                ValueType.STRING
+                            )
+                        )
+                        .setDimensions(dimensions(new DefaultDimensionSpec("v0", "v0", ValueType.STRING)))
+                        .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0")))
+                        .setContext(QUERY_CONTEXT_DEFAULT)
+                        .build()
+        ),
+        NullHandling.replaceWithDefault() ?
+        ImmutableList.of(
+            new Object[]{"10.1", 1L},
+            new Object[]{"2", 1L},
+            new Object[]{"a", 2L},
+            new Object[]{"abc", 2L}
+        ) :
+        ImmutableList.of(
+            new Object[]{"", 1L},
+            new Object[]{"10.1", 1L},
+            new Object[]{"a", 2L},
+            new Object[]{"abc", 2L}
+        )
+    );
+  }
+
+  @Test
   public void testMultiValueStringWorksLikeStringGroupBy() throws Exception
   {
     List<Object[]> expected;
@@ -8049,14 +8087,14 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
         "SELECT concat(dim3, 'foo') FROM druid.numfoo",
         ImmutableList.of(
             new Druids.ScanQueryBuilder()
-                        .dataSource(CalciteTests.DATASOURCE3)
-                        .intervals(querySegmentSpec(Filtration.eternity()))
-                        .virtualColumns(expressionVirtualColumn("v0", "concat(\"dim3\",'foo')", ValueType.STRING))
-                        .columns(ImmutableList.of("v0"))
-                        .context(QUERY_CONTEXT_DEFAULT)
-                        .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
-                        .legacy(false)
-                        .build()
+                .dataSource(CalciteTests.DATASOURCE3)
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .virtualColumns(expressionVirtualColumn("v0", "concat(\"dim3\",'foo')", ValueType.STRING))
+                .columns(ImmutableList.of("v0"))
+                .context(QUERY_CONTEXT_DEFAULT)
+                .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                .legacy(false)
+                .build()
         ),
         ImmutableList.of(
             new Object[]{"[\"afoo\",\"bfoo\"]"},
@@ -8070,16 +8108,16 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
   }
 
   @Test
-  public void testMultiValueStringWorksLikeStringScanWithFilter() throws Exception
+  public void testMultiValueStringWorksLikeStringSelfConcatScan() throws Exception
   {
+    final String nullVal = NullHandling.replaceWithDefault() ? "[\"-lol-\"]" : "[null]";
     testQuery(
-        "SELECT concat(dim3, 'foo') FROM druid.numfoo where concat(dim3, 'foo') = 'bfoo'",
+        "SELECT concat(dim3, '-lol-', dim3) FROM druid.numfoo",
         ImmutableList.of(
             new Druids.ScanQueryBuilder()
                 .dataSource(CalciteTests.DATASOURCE3)
                 .intervals(querySegmentSpec(Filtration.eternity()))
-                .virtualColumns(expressionVirtualColumn("v0", "concat(\"dim3\",'foo')", ValueType.STRING))
-                .filters(selector("v0", "bfoo", null))
+                .virtualColumns(expressionVirtualColumn("v0", "concat(\"dim3\",'-lol-',\"dim3\")", ValueType.STRING))
                 .columns(ImmutableList.of("v0"))
                 .context(QUERY_CONTEXT_DEFAULT)
                 .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
@@ -8087,46 +8125,36 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
                 .build()
         ),
         ImmutableList.of(
-            new Object[]{"[\"afoo\",\"bfoo\"]"},
-            new Object[]{"[\"bfoo\",\"cfoo\"]"}
+            new Object[]{"[\"a-lol-a\",\"a-lol-b\",\"b-lol-a\",\"b-lol-b\"]"},
+            new Object[]{"[\"b-lol-b\",\"b-lol-c\",\"c-lol-b\",\"c-lol-c\"]"},
+            new Object[]{"[\"d-lol-d\"]"},
+            new Object[]{"[\"-lol-\"]"},
+            new Object[]{nullVal},
+            new Object[]{nullVal}
         )
     );
   }
 
   @Test
-  public void testNvlColumns() throws Exception
+  public void testMultiValueStringWorksLikeStringScanWithFilter() throws Exception
   {
     testQuery(
-        "SELECT NVL(dim2, dim1), COUNT(*) FROM druid.foo GROUP BY NVL(dim2, dim1)\n",
+        "SELECT concat(dim3, 'foo') FROM druid.numfoo where concat(dim3, 'foo') = 'bfoo'",
         ImmutableList.of(
-            GroupByQuery.builder()
-                        .setDataSource(CalciteTests.DATASOURCE1)
-                        .setInterval(querySegmentSpec(Filtration.eternity()))
-                        .setGranularity(Granularities.ALL)
-                        .setVirtualColumns(
-                            expressionVirtualColumn(
-                                "v0",
-                                "case_searched(notnull(\"dim2\"),\"dim2\",\"dim1\")",
-                                ValueType.STRING
-                            )
-                        )
-                        .setDimensions(dimensions(new DefaultDimensionSpec("v0", "v0", ValueType.STRING)))
-                        .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0")))
-                        .setContext(QUERY_CONTEXT_DEFAULT)
-                        .build()
+            new Druids.ScanQueryBuilder()
+                .dataSource(CalciteTests.DATASOURCE3)
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .virtualColumns(expressionVirtualColumn("v0", "concat(\"dim3\",'foo')", ValueType.STRING))
+                .filters(selector("v0", "bfoo", null))
+                .columns(ImmutableList.of("v0"))
+                .context(QUERY_CONTEXT_DEFAULT)
+                .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                .legacy(false)
+                .build()
         ),
-        NullHandling.replaceWithDefault() ?
-        ImmutableList.of(
-            new Object[]{"10.1", 1L},
-            new Object[]{"2", 1L},
-            new Object[]{"a", 2L},
-            new Object[]{"abc", 2L}
-        ) :
         ImmutableList.of(
-            new Object[]{"", 1L},
-            new Object[]{"10.1", 1L},
-            new Object[]{"a", 2L},
-            new Object[]{"abc", 2L}
+            new Object[]{"[\"afoo\",\"bfoo\"]"},
+            new Object[]{"[\"bfoo\",\"cfoo\"]"}
         )
     );
   }
@@ -8270,7 +8298,6 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
   @Test
   public void testMultiValueStringSlice() throws Exception
   {
-    final String nullVal = NullHandling.replaceWithDefault() ? "[\"foo\"]" : "[null]";
     testQuery(
         "SELECT MV_SLICE(dim3, 1) FROM druid.numfoo",
         ImmutableList.of(
@@ -8295,7 +8322,6 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
     );
   }
 
-
   @Test
   public void testMultiValueStringLength() throws Exception
   {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org