You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2022/04/13 21:49:10 UTC

[pinot] branch master updated: Add customizable parser module (#8484)

This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new cb8bcc4d14 Add customizable parser module (#8484)
cb8bcc4d14 is described below

commit cb8bcc4d14f141e8834feb2b3e08ccba02920a3b
Author: Rong Rong <ro...@apache.org>
AuthorDate: Wed Apr 13 14:49:05 2022 -0700

    Add customizable parser module (#8484)
    
    * adding custome parser
    
    * adding stuff to use custom parser
    
    * adding SqlInsertFromFile SqlCall node. make E2E compilation work
    
    * fix license
    
    * fix compilation and test
    
    * adding in presto driver pom change as well
    
    * also add customizable parser test
    
    * remove the non-used FILE token
    
    * adding TODO for next steps
    
    * move fmpp maven plugin to contrib
    
    * fix format
    
    * add javadoc
    
    Co-authored-by: Rong Rong <ro...@startree.ai>
---
 contrib/pinot-fmpp-maven-plugin/pom.xml            | 111 +++++
 .../main/java/org/apache/pinot/fmpp/FMPPMojo.java  | 270 +++++++++++
 .../org/apache/pinot/fmpp/MavenDataLoader.java     |  55 +++
 headerdefinition.xml                               |  10 +
 pinot-common/pom.xml                               |  87 ++++
 pinot-common/src/main/codegen/config.fmpp          | 540 +++++++++++++++++++++
 .../src/main/codegen/includes/parserImpls.ftl      | 103 ++++
 .../apache/pinot/sql/parsers/CalciteSqlParser.java |  49 +-
 .../sql/parsers/parser/SqlInsertFromFile.java      |  74 +++
 .../pinot/sql/parsers/parser/UnparseUtils.java     |  66 +++
 .../pinot/sql/parsers/CalciteSqlCompilerTest.java  |  61 +--
 .../pinot-common-jdk8/pom.xml                      |  87 ++++
 pom.xml                                            |  25 +
 13 files changed, 1485 insertions(+), 53 deletions(-)

diff --git a/contrib/pinot-fmpp-maven-plugin/pom.xml b/contrib/pinot-fmpp-maven-plugin/pom.xml
new file mode 100644
index 0000000000..d09c5dab67
--- /dev/null
+++ b/contrib/pinot-fmpp-maven-plugin/pom.xml
@@ -0,0 +1,111 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <artifactId>pinot</artifactId>
+    <groupId>org.apache.pinot</groupId>
+    <version>0.11.0-SNAPSHOT</version>
+    <relativePath>../..</relativePath>
+  </parent>
+
+  <artifactId>pinot-fmpp-maven-plugin</artifactId>
+  <name>Pinot FMPP plugin</name>
+  <url>https://pinot.apache.org/</url>
+  <packaging>maven-plugin</packaging>
+  <properties>
+    <pinot.root>${basedir}/../..</pinot.root>
+    <maven.version>3.3.3</maven.version>
+    <fmpp.version>0.9.16</fmpp.version>
+    <freemarker.version>2.3.28</freemarker.version>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.maven</groupId>
+      <artifactId>maven-core</artifactId>
+      <version>${maven.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.codehaus.plexus</groupId>
+          <artifactId>plexus-utils</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.maven</groupId>
+      <artifactId>maven-plugin-api</artifactId>
+      <version>${maven.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>net.sourceforge.fmpp</groupId>
+      <artifactId>fmpp</artifactId>
+      <version>${fmpp.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.freemarker</groupId>
+      <artifactId>freemarker</artifactId>
+      <version>${freemarker.version}</version>
+    </dependency>
+  </dependencies>
+  <build>
+    <plugins>
+      <plugin>
+        <!-- Checkstyle shouldn't apply to Mojo classes -->
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-plugin-plugin</artifactId>
+        <configuration>
+          <goalPrefix>pinot-fmpp</goalPrefix>
+        </configuration>
+        <executions>
+          <execution>
+            <id>default-descriptor</id>
+            <goals>
+              <goal>descriptor</goal>
+            </goals>
+            <phase>process-classes</phase>
+          </execution>
+          <execution>
+            <id>help-descriptor</id>
+            <goals>
+              <goal>helpmojo</goal>
+            </goals>
+            <phase>process-classes</phase>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/FMPPMojo.java b/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/FMPPMojo.java
new file mode 100644
index 0000000000..787ac7606c
--- /dev/null
+++ b/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/FMPPMojo.java
@@ -0,0 +1,270 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.fmpp;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Stopwatch;
+import fmpp.Engine;
+import fmpp.ProgressListener;
+import fmpp.progresslisteners.TerseConsoleProgressListener;
+import fmpp.setting.Settings;
+import fmpp.util.MiscUtil;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import org.apache.commons.io.FileUtils;
+import org.apache.maven.plugin.AbstractMojo;
+import org.apache.maven.plugin.MojoExecutionException;
+import org.apache.maven.plugin.MojoFailureException;
+import org.apache.maven.project.MavenProject;
+
+import static java.lang.String.format;
+
+
+/**
+ * a maven plugin to run the freemarker generation incrementally
+ * (if output has not changed, the files are not touched)
+ *
+ * @goal generate
+ * @phase generate-sources
+ */
+public class FMPPMojo extends AbstractMojo {
+
+  /**
+   * Used to add new source directories to the build.
+   *
+   * @parameter default-value="${project}"
+   * @required
+   * @readonly
+   **/
+  private MavenProject project;
+
+  /**
+   * Where to find the FreeMarker template files.
+   *
+   * @parameter default-value="src/main/resources/fmpp/templates/"
+   * @required
+   */
+  private File templates;
+
+  /**
+   * Where to write the generated files of the output files.
+   *
+   * @parameter default-value="${project.build.directory}/generated-sources/fmpp/"
+   * @required
+   */
+  private File output;
+
+  /**
+   * Location of the FreeMarker config file.
+   *
+   * @parameter default-value="src/main/resources/fmpp/config.fmpp"
+   * @required
+   */
+  private File config;
+
+  /**
+   * compilation scope to be added to ("compile" or "test")
+   *
+   * @parameter default-value="compile"
+   * @required
+   */
+  private String scope;
+
+  /**
+   * FMPP data model build parameter.
+   *
+   * @see <a href="http://fmpp.sourceforge.net/settings.html#key_data">FMPP Data Model Building</a>
+   * @parameter default-value=""
+   */
+  private String data;
+
+  /**
+   * if maven properties are added as data
+   *
+   * @parameter default-value="true"
+   * @required
+   */
+  private boolean addMavenDataLoader;
+
+  @Override
+  public void execute()
+      throws MojoExecutionException, MojoFailureException {
+    if (project == null) {
+      throw new MojoExecutionException("This plugin can only be used inside a project.");
+    }
+    String outputPath = output.getAbsolutePath();
+    if ((!output.exists() && !output.mkdirs()) || !output.isDirectory()) {
+      throw new MojoFailureException("can not write to output dir: " + outputPath);
+    }
+    String templatesPath = templates.getAbsolutePath();
+    if (!templates.exists() || !templates.isDirectory()) {
+      throw new MojoFailureException("templates not found in dir: " + outputPath);
+    }
+
+    // add the output directory path to the project source directories
+    switch (scope) {
+      case "compile":
+        project.addCompileSourceRoot(outputPath);
+        break;
+      case "test":
+        project.addTestCompileSourceRoot(outputPath);
+        break;
+      default:
+        throw new MojoFailureException("scope must be compile or test");
+    }
+
+    final Stopwatch sw = Stopwatch.createStarted();
+    try {
+      getLog().info(
+          format("Freemarker generation:\n scope: %s,\n config: %s,\n templates: %s", scope, config.getAbsolutePath(),
+              templatesPath));
+      final File tmp = Files.createTempDirectory("freemarker-tmp").toFile();
+      String tmpPath = tmp.getAbsolutePath();
+      final String tmpPathNormalized = tmpPath.endsWith(File.separator) ? tmpPath : tmpPath + File.separator;
+      Settings settings = new Settings(new File("."));
+      settings.set(Settings.NAME_SOURCE_ROOT, templatesPath);
+      settings.set(Settings.NAME_OUTPUT_ROOT, tmp.getAbsolutePath());
+      settings.load(config);
+      settings.addProgressListener(new TerseConsoleProgressListener());
+      settings.addProgressListener(new ProgressListener() {
+        @Override
+        public void notifyProgressEvent(Engine engine, int event, File src, int pMode, Throwable error, Object param)
+            throws Exception {
+          if (event == EVENT_END_PROCESSING_SESSION) {
+            getLog().info(format("Freemarker generation took %dms", sw.elapsed(TimeUnit.MILLISECONDS)));
+            sw.reset();
+            Report report = moveIfChanged(tmp, tmpPathNormalized);
+            if (!tmp.delete()) {
+              throw new MojoFailureException(format("can not delete %s", tmp));
+            }
+            getLog().info(format("Incremental output update took %dms", sw.elapsed(TimeUnit.MILLISECONDS)));
+            getLog().info(format("new: %d", report.newFiles));
+            getLog().info(format("changed: %d", report.changedFiles));
+            getLog().info(format("unchanged: %d", report.unchangedFiles));
+          }
+        }
+      });
+      List<String> dataValues = new ArrayList<>();
+      if (addMavenDataLoader) {
+        getLog().info("Adding maven data loader");
+        settings.setEngineAttribute(MavenDataLoader.MAVEN_DATA_ATTRIBUTE, new MavenDataLoader.MavenData(project));
+        dataValues.add(format("maven: %s()", MavenDataLoader.class.getName()));
+      }
+      if (data != null) {
+        dataValues.add(data);
+      }
+      if (!dataValues.isEmpty()) {
+        String dataString = Joiner.on(",").join(dataValues);
+        getLog().info("Setting data loader " + dataString);
+
+        settings.add(Settings.NAME_DATA, dataString);
+      }
+      settings.execute();
+    } catch (Exception e) {
+      throw new MojoFailureException(MiscUtil.causeMessages(e), e);
+    }
+  }
+
+  private static final class Report {
+    int changedFiles;
+    int unchangedFiles;
+    int newFiles;
+
+    Report(int changedFiles, int unchangedFiles, int newFiles) {
+      super();
+      this.changedFiles = changedFiles;
+      this.unchangedFiles = unchangedFiles;
+      this.newFiles = newFiles;
+    }
+
+    public Report() {
+      this(0, 0, 0);
+    }
+
+    void add(Report other) {
+      changedFiles += other.changedFiles;
+      unchangedFiles += other.unchangedFiles;
+      newFiles += other.newFiles;
+    }
+
+    public void addChanged() {
+      ++changedFiles;
+    }
+
+    public void addNew() {
+      ++newFiles;
+    }
+
+    public void addUnchanged() {
+      ++unchangedFiles;
+    }
+  }
+
+  private Report moveIfChanged(File root, String tmpPath)
+      throws MojoFailureException, IOException {
+    Report report = new Report();
+    for (File file : root.listFiles()) {
+      if (file.isDirectory()) {
+        report.add(moveIfChanged(file, tmpPath));
+        if (!file.delete()) {
+          throw new MojoFailureException(format("can not delete %s", file));
+        }
+      } else {
+        String absPath = file.getAbsolutePath();
+        if (!absPath.startsWith(tmpPath)) {
+          throw new MojoFailureException(format("%s should start with %s", absPath, tmpPath));
+        }
+        String relPath = absPath.substring(tmpPath.length());
+        File outputFile = new File(output, relPath);
+        if (!outputFile.exists()) {
+          report.addNew();
+        } else if (!FileUtils.contentEquals(file, outputFile)) {
+          getLog().info(format("%s has changed", relPath));
+          if (!outputFile.delete()) {
+            throw new MojoFailureException(format("can not delete %s", outputFile));
+          }
+          report.addChanged();
+        } else {
+          report.addUnchanged();
+        }
+        if (!outputFile.exists()) {
+          File parentDir = outputFile.getParentFile();
+          if (parentDir.exists() && !parentDir.isDirectory()) {
+            throw new MojoFailureException(
+                format("can not move %s to %s as %s is not a dir", file, outputFile, parentDir));
+          }
+          if (!parentDir.exists() && !parentDir.mkdirs()) {
+            throw new MojoFailureException(
+                format("can not move %s to %s as dir %s can not be created", file, outputFile, parentDir));
+          }
+          FileUtils.moveFile(file, outputFile);
+        } else {
+          if (!file.delete()) {
+            throw new MojoFailureException(format("can not delete %s", file));
+          }
+        }
+      }
+    }
+    return report;
+  }
+}
diff --git a/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/MavenDataLoader.java b/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/MavenDataLoader.java
new file mode 100644
index 0000000000..df85ad891b
--- /dev/null
+++ b/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/MavenDataLoader.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.fmpp;
+
+import fmpp.Engine;
+import fmpp.tdd.DataLoader;
+import java.util.List;
+import org.apache.maven.project.MavenProject;
+
+
+/**
+ * A data loader for Maven
+ */
+public class MavenDataLoader implements DataLoader {
+  public static final class MavenData {
+    private final MavenProject project;
+
+    public MavenData(MavenProject project) {
+      this.project = project;
+    }
+
+    public MavenProject getProject() {
+      return project;
+    }
+  }
+
+  public static final String MAVEN_DATA_ATTRIBUTE = "maven.data";
+
+  @Override
+  public Object load(Engine e, List args)
+      throws Exception {
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("maven model data loader has no parameters");
+    }
+
+    MavenData data = (MavenData) e.getAttribute(MAVEN_DATA_ATTRIBUTE);
+    return data;
+  }
+}
diff --git a/headerdefinition.xml b/headerdefinition.xml
index f3fd4287ed..34d178e654 100644
--- a/headerdefinition.xml
+++ b/headerdefinition.xml
@@ -40,4 +40,14 @@
     <isMultiline>true</isMultiline>
     <padLines>false</padLines>
   </javadoc_style>
+  <ftl_style>
+    <firstLine>&lt;#--</firstLine>
+    <beforeEachLine>// </beforeEachLine>
+    <endLine>--&gt;</endLine>
+    <firstLineDetectionPattern>(\s|\t)*/\*.*$</firstLineDetectionPattern>
+    <lastLineDetectionPattern>.*\*/(\s|\t)*$</lastLineDetectionPattern>
+    <allowBlankLines>false</allowBlankLines>
+    <isMultiline>true</isMultiline>
+    <padLines>false</padLines>
+  </ftl_style>
 </additionalHeaders>
diff --git a/pinot-common/pom.xml b/pinot-common/pom.xml
index 76bd905535..2bb5b8127b 100644
--- a/pinot-common/pom.xml
+++ b/pinot-common/pom.xml
@@ -98,6 +98,93 @@
           </java>
         </configuration>
       </plugin>
+      <plugin>
+        <!-- Extract parser grammar template from calcite-core.jar and put
+             it under ${project.build.directory} where all freemarker templates are. -->
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>unpack-parser-template</id>
+            <phase>initialize</phase>
+            <goals>
+              <goal>unpack</goal>
+            </goals>
+            <configuration>
+              <artifactItems>
+                <artifactItem>
+                  <groupId>org.apache.calcite</groupId>
+                  <artifactId>calcite-core</artifactId>
+                  <type>jar</type>
+                  <overWrite>true</overWrite>
+                  <outputDirectory>${project.build.directory}/</outputDirectory>
+                  <includes>**/Parser.jj,**/default_config.fmpp</includes>
+                </artifactItem>
+              </artifactItems>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.pinot</groupId>
+        <artifactId>pinot-fmpp-maven-plugin</artifactId>
+        <version>${project.version}</version>
+        <executions>
+          <execution>
+            <id>generate-fmpp-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>generate</goal>
+            </goals>
+            <configuration>
+              <config>${project.basedir}/src/main/codegen/config.fmpp</config>
+              <output>${project.build.directory}/generated-sources/fmpp</output>
+              <templates>${project.build.directory}/codegen/templates</templates>
+              <data>tdd(${project.basedir}/src/main/codegen/config.fmpp), default:tdd(${project.build.directory}/codegen/default_config.fmpp)</data>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-generated-sources</id>
+            <phase>process-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>${project.build.directory}/generated-sources/javacc</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>javacc-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>generate-sources</phase>
+            <id>javacc</id>
+            <goals>
+              <goal>javacc</goal>
+            </goals>
+            <configuration>
+              <sourceDirectory>${project.build.directory}/generated-sources/fmpp</sourceDirectory>
+              <includes>
+                <include>**/Parser.jj</include>
+              </includes>
+              <lookAhead>2</lookAhead>
+              <isStatic>false</isStatic>
+              <outputDirectory>${project.build.directory}/generated-sources/javacc</outputDirectory>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
   <dependencies>
diff --git a/pinot-common/src/main/codegen/config.fmpp b/pinot-common/src/main/codegen/config.fmpp
new file mode 100644
index 0000000000..c83241a6a1
--- /dev/null
+++ b/pinot-common/src/main/codegen/config.fmpp
@@ -0,0 +1,540 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+data: {
+  # Data declarations for this parser.
+  #
+  # Default declarations are in default_config.fmpp; if you do not include a
+  # declaration ('imports' or 'nonReservedKeywords', for example) in this file,
+  # FMPP will use the declaration from default_config.fmpp.
+  parser: {
+    # Generated parser implementation class package and name
+    package: "org.apache.pinot.sql.parsers.parser",
+    class: "SqlParserImpl",
+
+    # List of import statements.
+    imports: [
+      "com.google.common.collect.*"
+      "org.apache.pinot.sql.parsers.parser.*"
+      "java.util.*"
+    ]
+
+    # List of new keywords to add
+    keywords: [
+      "FILE"
+      "ARCHIVE"
+    ]
+
+    # List of non-reserved keywords to add
+    nonReservedKeywordsToAdd: [
+      # customized for Pinot
+      "FILE"
+      "ARCHIVE"
+
+      # The following keywords are reserved in core Calcite,
+      # are reserved in some version of SQL,
+      # but are not reserved in Babel.
+      #
+      # Words that are commented out (e.g. "AND") are still reserved.
+      # These are the most important reserved words, and SQL cannot be
+      # unambiguously parsed if they are not reserved. For example, if
+      # "INNER" is not reserved then in the query
+      #
+      #   select * from emp inner join dept using (deptno)"
+      #
+      # "inner" could be a table alias for "emp".
+      #
+      # TODO: remove unused/untested BABEL non-reserved keywords since
+      #       we are only testing a small subset.
+      #       @see: CalciteSqlCompilerTest:testReservedKeywords
+      "ABS"
+      "ABSOLUTE"
+      "ACTION"
+      "ADD"
+      "AFTER"
+      "ALL"
+      "ALLOCATE"
+      "ALLOW"
+      "ALTER"
+      "AND"
+#     "ANY"
+      "ARE"
+      "ARRAY"
+#     "ARRAY_AGG" # not a keyword in Calcite
+      "ARRAY_MAX_CARDINALITY"
+      "AS"
+      "ASC"
+      "ASENSITIVE"
+      "ASSERTION"
+      "ASYMMETRIC"
+      "AT"
+      "ATOMIC"
+      "AUTHORIZATION"
+      "AVG"
+      "BEFORE"
+      "BEGIN"
+      "BEGIN_FRAME"
+      "BEGIN_PARTITION"
+      "BETWEEN"
+      "BIGINT"
+      "BINARY"
+      "BIT"
+#     "BIT_LENGTH" # not a keyword in Calcite
+      "BLOB"
+      "BOOLEAN"
+      "BOTH"
+      "BREADTH"
+      "BY"
+#     "CALL"
+      "CALLED"
+      "CARDINALITY"
+      "CASCADE"
+      "CASCADED"
+#     "CASE"
+      "CAST"
+      "CATALOG"
+      "CEIL"
+      "CEILING"
+      "CHAR"
+      "CHARACTER"
+      "CHARACTER_LENGTH"
+      "CHAR_LENGTH"
+      "CHECK"
+      "CLASSIFIER"
+      "CLOB"
+      "CLOSE"
+      "COALESCE"
+      "COLLATE"
+      "COLLATION"
+      "COLLECT"
+      "COLUMN"
+      "COMMIT"
+      "CONDITION"
+      "CONNECT"
+      "CONNECTION"
+      "CONSTRAINT"
+      "CONSTRAINTS"
+      "CONSTRUCTOR"
+      "CONTAINS"
+      "CONTINUE"
+      "CONVERT"
+      "CORR"
+      "CORRESPONDING"
+      "COUNT"
+      "COVAR_POP"
+      "COVAR_SAMP"
+#     "CREATE"
+#     "CROSS"
+      "CUBE"
+      "CUME_DIST"
+#     "CURRENT"
+      "CURRENT_CATALOG"
+      "CURRENT_DATE"
+      "CURRENT_DEFAULT_TRANSFORM_GROUP"
+      "CURRENT_PATH"
+      "CURRENT_ROLE"
+      "CURRENT_ROW"
+      "CURRENT_SCHEMA"
+      "CURRENT_TIME"
+      "CURRENT_TIMESTAMP"
+      "CURRENT_TRANSFORM_GROUP_FOR_TYPE"
+      "CURRENT_USER"
+#     "CURSOR"
+      "CYCLE"
+      "DATA"
+#     "DATE"
+      "DAY"
+      "DEALLOCATE"
+      "DEC"
+      "DECIMAL"
+      "DECLARE"
+#     "DEFAULT"
+      "DEFERRABLE"
+      "DEFERRED"
+#     "DEFINE"
+#     "DELETE"
+      "DENSE_RANK"
+      "DEPTH"
+      "DEREF"
+      "DESC"
+#     "DESCRIBE" # must be reserved
+      "DESCRIPTOR"
+      "DETERMINISTIC"
+      "DIAGNOSTICS"
+      "DISALLOW"
+      "DISCONNECT"
+#     "DISTINCT"
+#     "DO"  # not a keyword in Calcite
+      "DOMAIN"
+      "DOUBLE"
+#     "DROP" # probably must be reserved
+      "DYNAMIC"
+      "EACH"
+      "ELEMENT"
+      "ELSE"
+#     "ELSEIF" # not a keyword in Calcite
+      "EMPTY"
+      "END"
+#     "END-EXEC" # not a keyword in Calcite, and contains '-'
+      "END_FRAME"
+      "END_PARTITION"
+      "EQUALS"
+      "ESCAPE"
+      "EVERY"
+#     "EXCEPT" # must be reserved
+      "EXCEPTION"
+      "EXEC"
+      "EXECUTE"
+      "EXISTS"
+#     "EXIT" # not a keyword in Calcite
+      "EXP"
+#     "EXPLAIN" # must be reserved
+      "EXTEND"
+      "EXTERNAL"
+      "EXTRACT"
+      "FALSE"
+#     "FETCH"
+      "FILTER"
+      "FIRST"
+      "FIRST_VALUE"
+      "FLOAT"
+      "FLOOR"
+      "FOR"
+      "FOREIGN"
+#     "FOREVER" # not a keyword in Calcite
+      "FOUND"
+      "FRAME_ROW"
+      "FREE"
+#     "FROM" # must be reserved
+#     "FULL" # must be reserved
+      "FUNCTION"
+      "FUSION"
+      "GENERAL"
+      "GET"
+      "GLOBAL"
+      "GO"
+      "GOTO"
+#     "GRANT"
+#     "GROUP"
+#     "GROUPING"
+      "GROUPS"
+#     "HANDLER" # not a keyword in Calcite
+#     "HAVING"
+      "HOLD"
+      "HOUR"
+      "IDENTITY"
+#     "IF" # not a keyword in Calcite
+      "ILIKE"
+      "IMMEDIATE"
+      "IMMEDIATELY"
+      "IMPORT"
+#     "IN"
+      "INDICATOR"
+      "INITIAL"
+      "INITIALLY"
+#     "INNER"
+      "INOUT"
+      "INPUT"
+      "INSENSITIVE"
+#     "INSERT"
+      "INT"
+      "INTEGER"
+#     "INTERSECT"
+      "INTERSECTION"
+#     "INTERVAL"
+#     "INTO"
+      "IS"
+      "ISOLATION"
+#     "ITERATE" # not a keyword in Calcite
+#     "JOIN"
+      "JSON_ARRAY"
+      "JSON_ARRAYAGG"
+      "JSON_EXISTS"
+      "JSON_OBJECT"
+      "JSON_OBJECTAGG"
+      "JSON_QUERY"
+      "JSON_VALUE"
+#     "KEEP" # not a keyword in Calcite
+      "KEY"
+      "LAG"
+      "LANGUAGE"
+      "LARGE"
+      "LAST"
+      "LAST_VALUE"
+#     "LATERAL"
+      "LEAD"
+      "LEADING"
+#     "LEAVE" # not a keyword in Calcite
+#     "LEFT"
+      "LEVEL"
+      "LIKE"
+      "LIKE_REGEX"
+#     "LIMIT"
+      "LN"
+      "LOCAL"
+      "LOCALTIME"
+      "LOCALTIMESTAMP"
+      "LOCATOR"
+#     "LOOP" # not a keyword in Calcite
+      "LOWER"
+      "MAP"
+      "MATCH"
+      "MATCHES"
+      "MATCH_NUMBER"
+#     "MATCH_RECOGNIZE"
+      "MAX"
+#     "MAX_CARDINALITY" # not a keyword in Calcite
+      "MEASURES"
+      "MEMBER"
+#     "MERGE"
+      "METHOD"
+      "MIN"
+#     "MINUS"
+      "MINUTE"
+      "MOD"
+      "MODIFIES"
+      "MODULE"
+      "MONTH"
+      "MULTISET"
+      "NAMES"
+      "NATIONAL"
+#     "NATURAL"
+      "NCHAR"
+      "NCLOB"
+#     "NEW"
+#     "NEXT"
+      "NO"
+      "NONE"
+      "NORMALIZE"
+      "NOT"
+      "NTH_VALUE"
+      "NTILE"
+#     "NULL"
+      "NULLIF"
+      "NUMERIC"
+      "OBJECT"
+      "OCCURRENCES_REGEX"
+      "OCTET_LENGTH"
+      "OF"
+#     "OFFSET"
+      "OLD"
+      "OMIT"
+#     "ON"
+      "ONE"
+      "ONLY"
+      "OPEN"
+      "OPTION"
+      "OR"
+#     "ORDER"
+      "ORDINALITY"
+      "OUT"
+#     "OUTER"
+      "OUTPUT"
+#     "OVER"
+      "OVERLAPS"
+      "OVERLAY"
+      "PAD"
+      "PARAMETER"
+      "PARTIAL"
+#     "PARTITION"
+      "PATH"
+#     "PATTERN"
+      "PER"
+      "PERCENT"
+      "PERCENTILE_CONT"
+      "PERCENTILE_DISC"
+      "PERCENT_RANK"
+      "PERIOD"
+      "PERMUTE"
+      "PORTION"
+      "POSITION"
+      "POSITION_REGEX"
+      "POWER"
+      "PRECEDES"
+      "PRECISION"
+      "PREPARE"
+      "PRESERVE"
+      "PREV"
+      "PRIMARY"
+      "PRIOR"
+      "PRIVILEGES"
+      "PROCEDURE"
+      "PUBLIC"
+#     "RANGE"
+      "RANK"
+      "READ"
+      "READS"
+      "REAL"
+      "RECURSIVE"
+      "REF"
+      "REFERENCES"
+      "REFERENCING"
+      "REGR_AVGX"
+      "REGR_AVGY"
+      "REGR_COUNT"
+      "REGR_INTERCEPT"
+      "REGR_R2"
+      "REGR_SLOPE"
+      "REGR_SXX"
+      "REGR_SXY"
+      "REGR_SYY"
+      "RELATIVE"
+      "RELEASE"
+#     "REPEAT" # not a keyword in Calcite
+      "RESET"
+#     "RESIGNAL" # not a keyword in Calcite
+      "RESTRICT"
+      "RESULT"
+      "RETURN"
+      "RETURNS"
+      "REVOKE"
+#     "RIGHT"
+      "RLIKE"
+      "ROLE"
+      "ROLLBACK"
+#     "ROLLUP"
+      "ROUTINE"
+#     "ROW"
+#     "ROWS"
+      "ROW_NUMBER"
+      "RUNNING"
+      "SAVEPOINT"
+      "SCHEMA"
+      "SCOPE"
+      "SCROLL"
+      "SEARCH"
+      "SECOND"
+      "SECTION"
+      "SEEK"
+#     "SELECT"
+      "SENSITIVE"
+      "SESSION"
+      "SESSION_USER"
+#     "SET"
+#     "SETS"
+      "SHOW"
+#     "SIGNAL" # not a keyword in Calcite
+      "SIMILAR"
+      "SIZE"
+#     "SKIP" # messes with JavaCC's <SKIP> token
+      "SMALLINT"
+#     "SOME"
+      "SPACE"
+      "SPECIFIC"
+      "SPECIFICTYPE"
+      "SQL"
+#     "SQLCODE" # not a keyword in Calcite
+#     "SQLERROR" # not a keyword in Calcite
+      "SQLEXCEPTION"
+      "SQLSTATE"
+      "SQLWARNING"
+      "SQRT"
+      "START"
+      "STATE"
+      "STATIC"
+      "STDDEV_POP"
+      "STDDEV_SAMP"
+#     "STREAM"
+      "SUBMULTISET"
+      "SUBSET"
+      "SUBSTRING"
+      "SUBSTRING_REGEX"
+      "SUCCEEDS"
+      "SUM"
+      "SYMMETRIC"
+      "SYSTEM"
+      "SYSTEM_TIME"
+      "SYSTEM_USER"
+#     "TABLE"
+#     "TABLESAMPLE"
+      "TEMPORARY"
+#     "THEN"
+#     "TIME"
+#     "TIMESTAMP"
+      "TIMEZONE_HOUR"
+      "TIMEZONE_MINUTE"
+      "TINYINT"
+      "TO"
+      "TRAILING"
+      "TRANSACTION"
+      "TRANSLATE"
+      "TRANSLATE_REGEX"
+      "TRANSLATION"
+      "TREAT"
+      "TRIGGER"
+      "TRIM"
+      "TRIM_ARRAY"
+      "TRUE"
+      "TRUNCATE"
+      "UESCAPE"
+      "UNDER"
+#     "UNDO" # not a keyword in Calcite
+#     "UNION"
+      "UNIQUE"
+      "UNKNOWN"
+#     "UNNEST"
+#     "UNTIL" # not a keyword in Calcite
+#     "UPDATE"
+      "UPPER"
+      "UPSERT"
+      "USAGE"
+      "USER"
+#     "USING"
+      "VALUE"
+#     "VALUES"
+      "VALUE_OF"
+      "VARBINARY"
+      "VARCHAR"
+      "VARYING"
+      "VAR_POP"
+      "VAR_SAMP"
+      "VERSION"
+      "VERSIONING"
+#     "VERSIONS" # not a keyword in Calcite
+      "VIEW"
+#     "WHEN"
+      "WHENEVER"
+#     "WHERE"
+#     "WHILE" # not a keyword in Calcite
+      "WIDTH_BUCKET"
+#     "WINDOW"
+#     "WITH"
+      "WITHIN"
+      "WITHOUT"
+      "WORK"
+      "WRITE"
+      "YEAR"
+      "ZONE"
+    ]
+
+    # List of extended statement syntax to add
+    statementParserMethods: [
+      "SqlInsertFromFile()"
+    ]
+
+    # List of files in @includes directory that have parser method
+    implementationFiles: [
+      "parserImpls.ftl"
+    ],
+  }
+}
+
+freemarkerLinks: {
+  includes: includes/
+}
diff --git a/pinot-common/src/main/codegen/includes/parserImpls.ftl b/pinot-common/src/main/codegen/includes/parserImpls.ftl
new file mode 100644
index 0000000000..57d3e2b8af
--- /dev/null
+++ b/pinot-common/src/main/codegen/includes/parserImpls.ftl
@@ -0,0 +1,103 @@
+<#--
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+-->
+
+private void DataFileDef(List<SqlNode> list) :
+{
+    SqlParserPos pos;
+    SqlNode uri;
+}
+{
+    ( <FILE> | <ARCHIVE> )
+    {
+        pos = getPos();
+        list.add(StringLiteral());
+    }
+}
+
+SqlNodeList DataFileDefList() :
+{
+    SqlParserPos pos;
+    List<SqlNode> list = Lists.newArrayList();
+}
+{
+    <FROM> { pos = getPos(); }
+    { pos = getPos(); }
+    DataFileDef(list)
+    ( <COMMA> DataFileDef(list) )*
+    {
+        return new SqlNodeList(list, pos.plus(getPos()));
+    }
+}
+
+/**
+ * INSERT INTO [db_name.]table_name
+ *   FROM [ FILE | ARCHIVE ] 'file_uri' [, [ FILE | ARCHIVE ] 'file_uri' ]
+ */
+SqlInsertFromFile SqlInsertFromFile() :
+{
+    SqlParserPos pos;
+    SqlIdentifier dbName = null;
+    SqlIdentifier tableName;
+    SqlNodeList fileList = null;
+}
+{
+    <INSERT> { pos = getPos(); }
+    <INTO>
+    [
+        dbName = SimpleIdentifier()
+        <DOT>
+    ]
+
+    tableName = SimpleIdentifier()
+    [
+        fileList = DataFileDefList()
+    ]
+    {
+        return new SqlInsertFromFile(pos, dbName, tableName, fileList);
+    }
+}
+
+/* define the rest of the sql into SqlStmtList
+ */
+private void SqlStatementList(SqlNodeList list) :
+{
+}
+{
+    {
+        list.add(SqlStmt());
+    }
+}
+
+SqlNodeList SqlStmtsEof() :
+{
+    SqlParserPos pos;
+    SqlNodeList stmts;
+}
+{
+    {
+        pos = getPos();
+        stmts = new SqlNodeList(pos);
+        stmts.add(SqlStmt());
+    }
+    ( LOOKAHEAD(2, <SEMICOLON> SqlStmt()) <SEMICOLON> SqlStatementList(stmts) )*
+    [ <SEMICOLON> ] <EOF>
+    {
+        return stmts;
+    }
+}
diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
index 98c1e5fefa..1a1a1ac46e 100644
--- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
+++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
@@ -19,6 +19,7 @@
 package org.apache.pinot.sql.parsers;
 
 import com.google.common.annotations.VisibleForTesting;
+import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -29,7 +30,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-import org.apache.calcite.config.Lex;
+import org.apache.calcite.avatica.util.Casing;
 import org.apache.calcite.sql.SqlBasicCall;
 import org.apache.calcite.sql.SqlDataTypeSpec;
 import org.apache.calcite.sql.SqlExplain;
@@ -45,9 +46,8 @@ import org.apache.calcite.sql.SqlSelectKeyword;
 import org.apache.calcite.sql.fun.SqlBetweenOperator;
 import org.apache.calcite.sql.fun.SqlCase;
 import org.apache.calcite.sql.fun.SqlLikeOperator;
-import org.apache.calcite.sql.parser.SqlParseException;
+import org.apache.calcite.sql.parser.SqlAbstractParserImpl;
 import org.apache.calcite.sql.parser.SqlParser;
-import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl;
 import org.apache.calcite.sql.validate.SqlConformanceEnum;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.pinot.common.request.DataSource;
@@ -60,6 +60,7 @@ import org.apache.pinot.common.utils.request.RequestUtils;
 import org.apache.pinot.pql.parsers.pql2.ast.FilterKind;
 import org.apache.pinot.segment.spi.AggregationFunctionType;
 import org.apache.pinot.spi.utils.Pairs;
+import org.apache.pinot.sql.parsers.parser.SqlParserImpl;
 import org.apache.pinot.sql.parsers.rewriter.QueryRewriter;
 import org.apache.pinot.sql.parsers.rewriter.QueryRewriterFactory;
 import org.slf4j.Logger;
@@ -73,18 +74,6 @@ public class CalciteSqlParser {
   public static final List<QueryRewriter> QUERY_REWRITERS = new ArrayList<>(QueryRewriterFactory.getQueryRewriters());
   private static final Logger LOGGER = LoggerFactory.getLogger(CalciteSqlParser.class);
 
-  /** Lexical policy similar to MySQL with ANSI_QUOTES option enabled. (To be
-   * precise: MySQL on Windows; MySQL on Linux uses case-sensitive matching,
-   * like the Linux file system.) The case of identifiers is preserved whether
-   * or not they quoted; after which, identifiers are matched
-   * case-insensitively. Double quotes allow identifiers to contain
-   * non-alphanumeric characters. */
-  private static final Lex PINOT_LEX = Lex.MYSQL_ANSI;
-
-  // BABEL is a very liberal conformance value that allows anything supported by any dialect
-  private static final SqlParser.Config PARSER_CONFIG =
-      SqlParser.configBuilder().setLex(PINOT_LEX).setConformance(SqlConformanceEnum.BABEL)
-          .setParserFactory(SqlBabelParserImpl.FACTORY).build();
   // To Keep the backward compatibility with 'OPTION' Functionality in PQL, which is used to
   // provide more hints for query processing.
   //
@@ -95,6 +84,7 @@ public class CalciteSqlParser {
   //   `OPTION (<k1> = <v1>, <k2> = <v2>, <k3> = <v3>)`
   // or
   //   `OPTION (<k1> = <v1>) OPTION (<k2> = <v2>) OPTION (<k3> = <v3>)`
+  // TODO: move to use parser syntax extension: `OPTION` `(` `<key>` = `<value>` [, `<key>` = `<value>`]* `)`
   private static final Pattern OPTIONS_REGEX_PATTEN =
       Pattern.compile("option\\s*\\(([^\\)]+)\\)", Pattern.CASE_INSENSITIVE);
 
@@ -131,11 +121,11 @@ public class CalciteSqlParser {
       sql = removeOptionsFromSql(sql);
     }
 
-    SqlParser sqlParser = SqlParser.create(sql, PARSER_CONFIG);
     SqlNode sqlNode;
-    try {
-      sqlNode = sqlParser.parseQuery();
-    } catch (SqlParseException e) {
+    try (StringReader inStream = new StringReader(sql)) {
+      SqlParserImpl sqlParser = newSqlParser(inStream);
+      sqlNode = sqlParser.parseSqlStmtEof();
+    } catch (Throwable e) {
       throw new SqlCompilationException("Caught exception while parsing query: " + sql, e);
     }
 
@@ -311,16 +301,29 @@ public class CalciteSqlParser {
    * @throws SqlCompilationException if String is not a valid expression.
    */
   public static Expression compileToExpression(String expression) {
-    SqlParser sqlParser = SqlParser.create(expression, PARSER_CONFIG);
     SqlNode sqlNode;
-    try {
-      sqlNode = sqlParser.parseExpression();
-    } catch (SqlParseException e) {
+    try (StringReader inStream = new StringReader(expression)) {
+      SqlParserImpl sqlParser = newSqlParser(inStream);
+      sqlNode = sqlParser.parseSqlExpressionEof();
+    } catch (Throwable e) {
       throw new SqlCompilationException("Caught exception while parsing expression: " + expression, e);
     }
     return toExpression(sqlNode);
   }
 
+  @VisibleForTesting
+  static SqlParserImpl newSqlParser(StringReader inStream) {
+    SqlParserImpl sqlParser = new SqlParserImpl(inStream);
+    sqlParser.switchTo(SqlAbstractParserImpl.LexicalState.DQID);
+    // TODO: convert to MySQL conformance once we retired most of the un-tested BABEL tokens
+    sqlParser.setConformance(SqlConformanceEnum.BABEL);
+    sqlParser.setTabSize(1);
+    sqlParser.setQuotedCasing(Casing.UNCHANGED);
+    sqlParser.setUnquotedCasing(Casing.UNCHANGED);
+    sqlParser.setIdentifierMaxLength(SqlParser.DEFAULT_IDENTIFIER_MAX_LENGTH);
+    return sqlParser;
+  }
+
   private static void setOptions(PinotQuery pinotQuery, List<String> optionsStatements) {
     if (optionsStatements.isEmpty()) {
       return;
diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/SqlInsertFromFile.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/SqlInsertFromFile.java
new file mode 100644
index 0000000000..3b2df6f778
--- /dev/null
+++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/SqlInsertFromFile.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.sql.parsers.parser;
+
+import java.util.Arrays;
+import java.util.List;
+import org.apache.calcite.sql.SqlCall;
+import org.apache.calcite.sql.SqlIdentifier;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlNodeList;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.SqlSpecialOperator;
+import org.apache.calcite.sql.SqlWriter;
+import org.apache.calcite.sql.parser.SqlParserPos;
+
+
+/**
+ * Calcite extension for creating an INSERT sql node from a File object.
+ *
+ * <p>Syntax: INSERT INTO [db_name.]table_name FROM [ FILE | ARCHIVE ] 'file_uri' [, [ FILE | ARCHIVE ] 'file_uri' ]
+ */
+public class SqlInsertFromFile extends SqlCall {
+  private static final SqlSpecialOperator OPERATOR = new SqlSpecialOperator("UDF", SqlKind.OTHER_DDL);
+  private SqlIdentifier _dbName;
+  private SqlIdentifier _tableName;
+  private SqlNodeList _fileList;
+
+  public SqlInsertFromFile(SqlParserPos pos, SqlIdentifier dbName, SqlIdentifier tableName, SqlNodeList fileList) {
+    super(pos);
+    _dbName = dbName;
+    _tableName = tableName;
+    _fileList = fileList;
+  }
+
+  @Override
+  public void unparse(SqlWriter writer, int leftPrec, int rightPrec) {
+    UnparseUtils u = new UnparseUtils(writer, leftPrec, rightPrec);
+    u.keyword("INSERT", "INTO");
+    if (_dbName != null) {
+      u.node(_dbName).keyword(".");
+    }
+    u.node(_tableName);
+    if (_fileList != null) {
+      u.keyword("FROM").nodeList(_fileList);
+    }
+  }
+
+  @Override
+  public SqlOperator getOperator() {
+    return OPERATOR;
+  }
+
+  @Override
+  public List<SqlNode> getOperandList() {
+    return Arrays.asList(_dbName, _tableName, _fileList);
+  }
+}
diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/UnparseUtils.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/UnparseUtils.java
new file mode 100644
index 0000000000..4d85337c73
--- /dev/null
+++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/UnparseUtils.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.sql.parsers.parser;
+
+import java.util.Arrays;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlNodeList;
+import org.apache.calcite.sql.SqlWriter;
+
+
+/**
+ * {@code UnparseUtils} provides utility for unparsing keywords, {@link SqlNode} or {@link SqlNodeList} using provided
+ * {@link SqlWriter}.
+ *
+ * @see SqlNode#unparse(SqlWriter, int, int)
+ */
+class UnparseUtils {
+  private final SqlWriter _writer;
+  private final int _leftPrec;
+  private final int _rightPrec;
+
+  UnparseUtils(SqlWriter writer, int leftPrec, int rightPrec) {
+    _writer = writer;
+    _leftPrec = leftPrec;
+    _rightPrec = rightPrec;
+  }
+
+  UnparseUtils keyword(String... keywords) {
+    Arrays.stream(keywords).forEach(_writer::keyword);
+    return this;
+  }
+
+  UnparseUtils node(SqlNode n) {
+    n.unparse(_writer, _leftPrec, _rightPrec);
+    return this;
+  }
+
+  UnparseUtils nodeList(SqlNodeList l) {
+    _writer.keyword("(");
+    if (l.size() > 0) {
+      l.get(0).unparse(_writer, _leftPrec, _rightPrec);
+      for (int i = 1; i < l.size(); i++) {
+        _writer.keyword(",");
+        l.get(i).unparse(_writer, _leftPrec, _rightPrec);
+      }
+    }
+    _writer.keyword(")");
+    return this;
+  }
+}
diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
index ae0eae0e19..83cde06edc 100644
--- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
@@ -18,14 +18,15 @@
  */
 package org.apache.pinot.sql.parsers;
 
+import java.io.StringReader;
 import java.time.Instant;
 import java.time.ZoneId;
 import java.time.format.DateTimeFormatter;
 import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
+import org.apache.calcite.sql.SqlNode;
 import org.apache.calcite.sql.SqlNumericLiteral;
-import org.apache.calcite.sql.parser.SqlParseException;
 import org.apache.pinot.common.request.AggregationInfo;
 import org.apache.pinot.common.request.BrokerRequest;
 import org.apache.pinot.common.request.Expression;
@@ -40,6 +41,8 @@ import org.apache.pinot.common.utils.request.RequestUtils;
 import org.apache.pinot.pql.parsers.PinotQuery2BrokerRequestConverter;
 import org.apache.pinot.pql.parsers.pql2.ast.FilterKind;
 import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.sql.parsers.parser.SqlInsertFromFile;
+import org.apache.pinot.sql.parsers.parser.SqlParserImpl;
 import org.apache.pinot.sql.parsers.rewriter.CompileTimeFunctionsInvoker;
 import org.testng.Assert;
 import org.testng.annotations.Test;
@@ -445,9 +448,8 @@ public class CalciteSqlCompilerTest {
     literal = pinotQuery.getSelectList().get(0).getLiteral();
     Assert.assertNull(literal);
 
-    pinotQuery = CalciteSqlParser
-        .compileToPinotQuery("select encodeUrl('key1=value 1&key2=value@!$2&key3=value%3'), "
-            + "decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253') from mytable");
+    pinotQuery = CalciteSqlParser.compileToPinotQuery("select encodeUrl('key1=value 1&key2=value@!$2&key3=value%3'), "
+        + "decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253') from mytable");
     Literal literal1 = pinotQuery.getSelectList().get(0).getLiteral();
     Literal literal2 = pinotQuery.getSelectList().get(1).getLiteral();
     Assert.assertNotNull(literal1);
@@ -460,8 +462,8 @@ public class CalciteSqlCompilerTest {
     Assert.assertEquals(tempBrokerRequest.getSelections().getSelectionColumns().get(1),
         String.format("'%s'", literal2.getFieldValue().toString()));
 
-    pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT count(*) from mytable "
-        + "where bar = encodeUrl('key1=value 1&key2=value@!$2&key3=value%3')");
+    pinotQuery = CalciteSqlParser.compileToPinotQuery(
+        "SELECT count(*) from mytable " + "where bar = encodeUrl('key1=value 1&key2=value@!$2&key3=value%3')");
     literal = pinotQuery.getSelectList().get(0).getLiteral();
     Assert.assertNull(literal);
 
@@ -1003,8 +1005,6 @@ public class CalciteSqlCompilerTest {
       Assert.fail("Query should have failed compilation");
     } catch (Exception e) {
       Assert.assertTrue(e instanceof SqlCompilationException);
-      Assert.assertTrue(e.getCause() instanceof SqlParseException);
-      Assert.assertTrue(e.getCause().getMessage().contains("Encountered \", DISTINCT\" at line 1, column 15."));
     }
 
     // not supported by Calcite SQL (this is in compliance with SQL standard)
@@ -1014,8 +1014,6 @@ public class CalciteSqlCompilerTest {
       Assert.fail("Query should have failed compilation");
     } catch (Exception e) {
       Assert.assertTrue(e instanceof SqlCompilationException);
-      Assert.assertTrue(e.getCause() instanceof SqlParseException);
-      Assert.assertTrue(e.getCause().getMessage().contains("Encountered \", DISTINCT\" at line 1, column 10."));
     }
 
     // not supported by Calcite SQL (this is in compliance with SQL standard)
@@ -1025,8 +1023,6 @@ public class CalciteSqlCompilerTest {
       Assert.fail("Query should have failed compilation");
     } catch (Exception e) {
       Assert.assertTrue(e instanceof SqlCompilationException);
-      Assert.assertTrue(e.getCause() instanceof SqlParseException);
-      Assert.assertTrue(e.getCause().getMessage().contains("Encountered \", DISTINCT\" at line 1, column 18."));
     }
 
     // The following query although a valid SQL syntax is not
@@ -1559,9 +1555,7 @@ public class CalciteSqlCompilerTest {
       Assert.fail("Query should have failed to compile");
     } catch (Exception e) {
       Assert.assertTrue(e instanceof SqlCompilationException);
-      Assert.assertTrue(e.getCause() instanceof SqlParseException);
       String message = e.getCause().getMessage();
-      Assert.assertTrue(message.startsWith("Encountered") && message.contains("table"));
     }
     // date - need to escape
     try {
@@ -1569,9 +1563,6 @@ public class CalciteSqlCompilerTest {
       Assert.fail("Query should have failed to compile");
     } catch (Exception e) {
       Assert.assertTrue(e instanceof SqlCompilationException);
-      Assert.assertTrue(e.getCause() instanceof SqlParseException);
-      String message = e.getCause().getMessage();
-      Assert.assertTrue(message.startsWith("Encountered") && message.contains("Date"));
     }
 
     // timestamp - need to escape
@@ -1580,9 +1571,6 @@ public class CalciteSqlCompilerTest {
       Assert.fail("Query should have failed to compile");
     } catch (Exception e) {
       Assert.assertTrue(e instanceof SqlCompilationException);
-      Assert.assertTrue(e.getCause() instanceof SqlParseException);
-      String message = e.getCause().getMessage();
-      Assert.assertTrue(message.startsWith("Encountered") && message.contains("timestamp"));
     }
 
     // time - need to escape
@@ -1591,9 +1579,6 @@ public class CalciteSqlCompilerTest {
       Assert.fail("Query should have failed to compile");
     } catch (Exception e) {
       Assert.assertTrue(e instanceof SqlCompilationException);
-      Assert.assertTrue(e.getCause() instanceof SqlParseException);
-      String message = e.getCause().getMessage();
-      Assert.assertTrue(message.startsWith("Encountered") && message.contains("time"));
     }
 
     // group - need to escape
@@ -1602,9 +1587,6 @@ public class CalciteSqlCompilerTest {
       Assert.fail("Query should have failed to compile");
     } catch (Exception e) {
       Assert.assertTrue(e instanceof SqlCompilationException);
-      Assert.assertTrue(e.getCause() instanceof SqlParseException);
-      String message = e.getCause().getMessage();
-      Assert.assertTrue(message.startsWith("Encountered") && message.contains("group"));
     }
 
     // escaping the above works
@@ -1998,15 +1980,14 @@ public class CalciteSqlCompilerTest {
     Assert.assertEquals(expression.getLiteral().getFieldValue(),
         "key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253");
 
-    expression = CalciteSqlParser
-        .compileToExpression("decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')");
+    expression =
+        CalciteSqlParser.compileToExpression("decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')");
     Assert.assertNotNull(expression.getFunctionCall());
     pinotQuery.setFilterExpression(expression);
     pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery);
     expression = pinotQuery.getFilterExpression();
     Assert.assertNotNull(expression.getLiteral());
-    Assert.assertEquals(expression.getLiteral().getFieldValue(),
-        "key1=value 1&key2=value@!$2&key3=value%3");
+    Assert.assertEquals(expression.getLiteral().getFieldValue(), "key1=value 1&key2=value@!$2&key3=value%3");
 
     expression = CalciteSqlParser.compileToExpression("reverse(playerName)");
     Assert.assertNotNull(expression.getFunctionCall());
@@ -2656,4 +2637,24 @@ public class CalciteSqlCompilerTest {
     Assert.expectThrows(SqlCompilationException.class,
         () -> CalciteSqlParser.compileToPinotQuery("SELECT UPPER(col1), avg(col2) from foo"));
   }
+
+  /**
+   * Test for customized components in src/main/codegen/parserImpls.ftl file.
+   */
+  @Test
+  public void testParserExtensionImpl() {
+    String customSql = "INSERT INTO db.tbl FROM FILE 'file:///tmp/file1', FILE 'file:///tmp/file2'";
+    SqlNode sqlNode = testSqlWithCustomSqlParser(customSql);
+    Assert.assertTrue(sqlNode instanceof SqlInsertFromFile);
+  }
+
+  private static SqlNode testSqlWithCustomSqlParser(String sqlString) {
+    try (StringReader inStream = new StringReader(sqlString)) {
+      SqlParserImpl sqlParser = CalciteSqlParser.newSqlParser(inStream);
+      return sqlParser.parseSqlStmtEof();
+    } catch (Exception e) {
+      Assert.fail("test custom sql parser failed", e);
+    }
+    return null;
+  }
 }
diff --git a/pinot-connectors/prestodb-pinot-dependencies/pinot-common-jdk8/pom.xml b/pinot-connectors/prestodb-pinot-dependencies/pinot-common-jdk8/pom.xml
index abba9e2dc2..095cfca22c 100644
--- a/pinot-connectors/prestodb-pinot-dependencies/pinot-common-jdk8/pom.xml
+++ b/pinot-connectors/prestodb-pinot-dependencies/pinot-common-jdk8/pom.xml
@@ -86,6 +86,93 @@
           </java>
         </configuration>
       </plugin>
+      <plugin>
+        <!-- Extract parser grammar template from calcite-core.jar and put
+             it under ${project.build.directory} where all freemarker templates are. -->
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>unpack-parser-template</id>
+            <phase>initialize</phase>
+            <goals>
+              <goal>unpack</goal>
+            </goals>
+            <configuration>
+              <artifactItems>
+                <artifactItem>
+                  <groupId>org.apache.calcite</groupId>
+                  <artifactId>calcite-core</artifactId>
+                  <type>jar</type>
+                  <overWrite>true</overWrite>
+                  <outputDirectory>${project.build.directory}/</outputDirectory>
+                  <includes>**/Parser.jj,**/default_config.fmpp</includes>
+                </artifactItem>
+              </artifactItems>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.pinot</groupId>
+        <artifactId>pinot-fmpp-maven-plugin</artifactId>
+        <version>${project.version}</version>
+        <executions>
+          <execution>
+            <id>generate-fmpp-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>generate</goal>
+            </goals>
+            <configuration>
+              <config>${project.basedir}/src/main/codegen/config.fmpp</config>
+              <output>${project.build.directory}/generated-sources/fmpp</output>
+              <templates>${project.build.directory}/codegen/templates</templates>
+              <data>tdd(${project.basedir}/src/main/codegen/config.fmpp), default:tdd(${project.build.directory}/codegen/default_config.fmpp)</data>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-generated-sources</id>
+            <phase>process-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>${project.build.directory}/generated-sources/javacc</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>javacc-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>generate-sources</phase>
+            <id>javacc</id>
+            <goals>
+              <goal>javacc</goal>
+            </goals>
+            <configuration>
+              <sourceDirectory>${project.build.directory}/generated-sources/fmpp</sourceDirectory>
+              <includes>
+                <include>**/Parser.jj</include>
+              </includes>
+              <lookAhead>2</lookAhead>
+              <isStatic>false</isStatic>
+              <outputDirectory>${project.build.directory}/generated-sources/javacc</outputDirectory>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
   <dependencies>
diff --git a/pom.xml b/pom.xml
index 7ff2080020..6065d06353 100644
--- a/pom.xml
+++ b/pom.xml
@@ -56,6 +56,7 @@
     <module>pinot-connectors</module>
     <module>pinot-segment-local</module>
     <module>pinot-compatibility-verifier</module>
+    <module>contrib/pinot-fmpp-maven-plugin</module>
   </modules>
 
   <licenses>
@@ -1662,6 +1663,8 @@
             <config>SCRIPT_STYLE</config>
             <queries>SCRIPT_STYLE</queries>
             <results>SCRIPT_STYLE</results>
+            <fmpp>SCRIPT_STYLE</fmpp>
+            <ftl>FTL_STYLE</ftl>
             <MockMaker>SCRIPT_STYLE</MockMaker>
             <appAssemblerScriptTemplate>SCRIPT_STYLE</appAssemblerScriptTemplate>
             <pql>SCRIPT_STYLE</pql>
@@ -1824,6 +1827,28 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <version>3.3.0</version>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>javacc-maven-plugin</artifactId>
+        <version>2.6</version>
+        <dependencies>
+          <dependency>
+            <groupId>net.java.dev.javacc</groupId>
+            <artifactId>javacc</artifactId>
+            <version>7.0.10</version>
+          </dependency>
+        </dependencies>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-plugin-plugin</artifactId>
+        <version>3.6.0</version>
+      </plugin>
     </plugins>
   </build>
   <reporting>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org