You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2022/04/13 21:49:10 UTC
[pinot] branch master updated: Add customizable parser module (#8484)
This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new cb8bcc4d14 Add customizable parser module (#8484)
cb8bcc4d14 is described below
commit cb8bcc4d14f141e8834feb2b3e08ccba02920a3b
Author: Rong Rong <ro...@apache.org>
AuthorDate: Wed Apr 13 14:49:05 2022 -0700
Add customizable parser module (#8484)
* adding custome parser
* adding stuff to use custom parser
* adding SqlInsertFromFile SqlCall node. make E2E compilation work
* fix license
* fix compilation and test
* adding in presto driver pom change as well
* also add customizable parser test
* remove the non-used FILE token
* adding TODO for next steps
* move fmpp maven plugin to contrib
* fix format
* add javadoc
Co-authored-by: Rong Rong <ro...@startree.ai>
---
contrib/pinot-fmpp-maven-plugin/pom.xml | 111 +++++
.../main/java/org/apache/pinot/fmpp/FMPPMojo.java | 270 +++++++++++
.../org/apache/pinot/fmpp/MavenDataLoader.java | 55 +++
headerdefinition.xml | 10 +
pinot-common/pom.xml | 87 ++++
pinot-common/src/main/codegen/config.fmpp | 540 +++++++++++++++++++++
.../src/main/codegen/includes/parserImpls.ftl | 103 ++++
.../apache/pinot/sql/parsers/CalciteSqlParser.java | 49 +-
.../sql/parsers/parser/SqlInsertFromFile.java | 74 +++
.../pinot/sql/parsers/parser/UnparseUtils.java | 66 +++
.../pinot/sql/parsers/CalciteSqlCompilerTest.java | 61 +--
.../pinot-common-jdk8/pom.xml | 87 ++++
pom.xml | 25 +
13 files changed, 1485 insertions(+), 53 deletions(-)
diff --git a/contrib/pinot-fmpp-maven-plugin/pom.xml b/contrib/pinot-fmpp-maven-plugin/pom.xml
new file mode 100644
index 0000000000..d09c5dab67
--- /dev/null
+++ b/contrib/pinot-fmpp-maven-plugin/pom.xml
@@ -0,0 +1,111 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>pinot</artifactId>
+ <groupId>org.apache.pinot</groupId>
+ <version>0.11.0-SNAPSHOT</version>
+ <relativePath>../..</relativePath>
+ </parent>
+
+ <artifactId>pinot-fmpp-maven-plugin</artifactId>
+ <name>Pinot FMPP plugin</name>
+ <url>https://pinot.apache.org/</url>
+ <packaging>maven-plugin</packaging>
+ <properties>
+ <pinot.root>${basedir}/../..</pinot.root>
+ <maven.version>3.3.3</maven.version>
+ <fmpp.version>0.9.16</fmpp.version>
+ <freemarker.version>2.3.28</freemarker.version>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.maven</groupId>
+ <artifactId>maven-core</artifactId>
+ <version>${maven.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.codehaus.plexus</groupId>
+ <artifactId>plexus-utils</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.maven</groupId>
+ <artifactId>maven-plugin-api</artifactId>
+ <version>${maven.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>net.sourceforge.fmpp</groupId>
+ <artifactId>fmpp</artifactId>
+ <version>${fmpp.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.freemarker</groupId>
+ <artifactId>freemarker</artifactId>
+ <version>${freemarker.version}</version>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <!-- Checkstyle shouldn't apply to Mojo classes -->
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ <configuration>
+ <skip>true</skip>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-plugin-plugin</artifactId>
+ <configuration>
+ <goalPrefix>pinot-fmpp</goalPrefix>
+ </configuration>
+ <executions>
+ <execution>
+ <id>default-descriptor</id>
+ <goals>
+ <goal>descriptor</goal>
+ </goals>
+ <phase>process-classes</phase>
+ </execution>
+ <execution>
+ <id>help-descriptor</id>
+ <goals>
+ <goal>helpmojo</goal>
+ </goals>
+ <phase>process-classes</phase>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/FMPPMojo.java b/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/FMPPMojo.java
new file mode 100644
index 0000000000..787ac7606c
--- /dev/null
+++ b/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/FMPPMojo.java
@@ -0,0 +1,270 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.fmpp;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Stopwatch;
+import fmpp.Engine;
+import fmpp.ProgressListener;
+import fmpp.progresslisteners.TerseConsoleProgressListener;
+import fmpp.setting.Settings;
+import fmpp.util.MiscUtil;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import org.apache.commons.io.FileUtils;
+import org.apache.maven.plugin.AbstractMojo;
+import org.apache.maven.plugin.MojoExecutionException;
+import org.apache.maven.plugin.MojoFailureException;
+import org.apache.maven.project.MavenProject;
+
+import static java.lang.String.format;
+
+
+/**
+ * a maven plugin to run the freemarker generation incrementally
+ * (if output has not changed, the files are not touched)
+ *
+ * @goal generate
+ * @phase generate-sources
+ */
+public class FMPPMojo extends AbstractMojo {
+
+ /**
+ * Used to add new source directories to the build.
+ *
+ * @parameter default-value="${project}"
+ * @required
+ * @readonly
+ **/
+ private MavenProject project;
+
+ /**
+ * Where to find the FreeMarker template files.
+ *
+ * @parameter default-value="src/main/resources/fmpp/templates/"
+ * @required
+ */
+ private File templates;
+
+ /**
+ * Where to write the generated files of the output files.
+ *
+ * @parameter default-value="${project.build.directory}/generated-sources/fmpp/"
+ * @required
+ */
+ private File output;
+
+ /**
+ * Location of the FreeMarker config file.
+ *
+ * @parameter default-value="src/main/resources/fmpp/config.fmpp"
+ * @required
+ */
+ private File config;
+
+ /**
+ * compilation scope to be added to ("compile" or "test")
+ *
+ * @parameter default-value="compile"
+ * @required
+ */
+ private String scope;
+
+ /**
+ * FMPP data model build parameter.
+ *
+ * @see <a href="http://fmpp.sourceforge.net/settings.html#key_data">FMPP Data Model Building</a>
+ * @parameter default-value=""
+ */
+ private String data;
+
+ /**
+ * if maven properties are added as data
+ *
+ * @parameter default-value="true"
+ * @required
+ */
+ private boolean addMavenDataLoader;
+
+ @Override
+ public void execute()
+ throws MojoExecutionException, MojoFailureException {
+ if (project == null) {
+ throw new MojoExecutionException("This plugin can only be used inside a project.");
+ }
+ String outputPath = output.getAbsolutePath();
+ if ((!output.exists() && !output.mkdirs()) || !output.isDirectory()) {
+ throw new MojoFailureException("can not write to output dir: " + outputPath);
+ }
+ String templatesPath = templates.getAbsolutePath();
+ if (!templates.exists() || !templates.isDirectory()) {
+ throw new MojoFailureException("templates not found in dir: " + outputPath);
+ }
+
+ // add the output directory path to the project source directories
+ switch (scope) {
+ case "compile":
+ project.addCompileSourceRoot(outputPath);
+ break;
+ case "test":
+ project.addTestCompileSourceRoot(outputPath);
+ break;
+ default:
+ throw new MojoFailureException("scope must be compile or test");
+ }
+
+ final Stopwatch sw = Stopwatch.createStarted();
+ try {
+ getLog().info(
+ format("Freemarker generation:\n scope: %s,\n config: %s,\n templates: %s", scope, config.getAbsolutePath(),
+ templatesPath));
+ final File tmp = Files.createTempDirectory("freemarker-tmp").toFile();
+ String tmpPath = tmp.getAbsolutePath();
+ final String tmpPathNormalized = tmpPath.endsWith(File.separator) ? tmpPath : tmpPath + File.separator;
+ Settings settings = new Settings(new File("."));
+ settings.set(Settings.NAME_SOURCE_ROOT, templatesPath);
+ settings.set(Settings.NAME_OUTPUT_ROOT, tmp.getAbsolutePath());
+ settings.load(config);
+ settings.addProgressListener(new TerseConsoleProgressListener());
+ settings.addProgressListener(new ProgressListener() {
+ @Override
+ public void notifyProgressEvent(Engine engine, int event, File src, int pMode, Throwable error, Object param)
+ throws Exception {
+ if (event == EVENT_END_PROCESSING_SESSION) {
+ getLog().info(format("Freemarker generation took %dms", sw.elapsed(TimeUnit.MILLISECONDS)));
+ sw.reset();
+ Report report = moveIfChanged(tmp, tmpPathNormalized);
+ if (!tmp.delete()) {
+ throw new MojoFailureException(format("can not delete %s", tmp));
+ }
+ getLog().info(format("Incremental output update took %dms", sw.elapsed(TimeUnit.MILLISECONDS)));
+ getLog().info(format("new: %d", report.newFiles));
+ getLog().info(format("changed: %d", report.changedFiles));
+ getLog().info(format("unchanged: %d", report.unchangedFiles));
+ }
+ }
+ });
+ List<String> dataValues = new ArrayList<>();
+ if (addMavenDataLoader) {
+ getLog().info("Adding maven data loader");
+ settings.setEngineAttribute(MavenDataLoader.MAVEN_DATA_ATTRIBUTE, new MavenDataLoader.MavenData(project));
+ dataValues.add(format("maven: %s()", MavenDataLoader.class.getName()));
+ }
+ if (data != null) {
+ dataValues.add(data);
+ }
+ if (!dataValues.isEmpty()) {
+ String dataString = Joiner.on(",").join(dataValues);
+ getLog().info("Setting data loader " + dataString);
+
+ settings.add(Settings.NAME_DATA, dataString);
+ }
+ settings.execute();
+ } catch (Exception e) {
+ throw new MojoFailureException(MiscUtil.causeMessages(e), e);
+ }
+ }
+
+ private static final class Report {
+ int changedFiles;
+ int unchangedFiles;
+ int newFiles;
+
+ Report(int changedFiles, int unchangedFiles, int newFiles) {
+ super();
+ this.changedFiles = changedFiles;
+ this.unchangedFiles = unchangedFiles;
+ this.newFiles = newFiles;
+ }
+
+ public Report() {
+ this(0, 0, 0);
+ }
+
+ void add(Report other) {
+ changedFiles += other.changedFiles;
+ unchangedFiles += other.unchangedFiles;
+ newFiles += other.newFiles;
+ }
+
+ public void addChanged() {
+ ++changedFiles;
+ }
+
+ public void addNew() {
+ ++newFiles;
+ }
+
+ public void addUnchanged() {
+ ++unchangedFiles;
+ }
+ }
+
+ private Report moveIfChanged(File root, String tmpPath)
+ throws MojoFailureException, IOException {
+ Report report = new Report();
+ for (File file : root.listFiles()) {
+ if (file.isDirectory()) {
+ report.add(moveIfChanged(file, tmpPath));
+ if (!file.delete()) {
+ throw new MojoFailureException(format("can not delete %s", file));
+ }
+ } else {
+ String absPath = file.getAbsolutePath();
+ if (!absPath.startsWith(tmpPath)) {
+ throw new MojoFailureException(format("%s should start with %s", absPath, tmpPath));
+ }
+ String relPath = absPath.substring(tmpPath.length());
+ File outputFile = new File(output, relPath);
+ if (!outputFile.exists()) {
+ report.addNew();
+ } else if (!FileUtils.contentEquals(file, outputFile)) {
+ getLog().info(format("%s has changed", relPath));
+ if (!outputFile.delete()) {
+ throw new MojoFailureException(format("can not delete %s", outputFile));
+ }
+ report.addChanged();
+ } else {
+ report.addUnchanged();
+ }
+ if (!outputFile.exists()) {
+ File parentDir = outputFile.getParentFile();
+ if (parentDir.exists() && !parentDir.isDirectory()) {
+ throw new MojoFailureException(
+ format("can not move %s to %s as %s is not a dir", file, outputFile, parentDir));
+ }
+ if (!parentDir.exists() && !parentDir.mkdirs()) {
+ throw new MojoFailureException(
+ format("can not move %s to %s as dir %s can not be created", file, outputFile, parentDir));
+ }
+ FileUtils.moveFile(file, outputFile);
+ } else {
+ if (!file.delete()) {
+ throw new MojoFailureException(format("can not delete %s", file));
+ }
+ }
+ }
+ }
+ return report;
+ }
+}
diff --git a/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/MavenDataLoader.java b/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/MavenDataLoader.java
new file mode 100644
index 0000000000..df85ad891b
--- /dev/null
+++ b/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/MavenDataLoader.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.fmpp;
+
+import fmpp.Engine;
+import fmpp.tdd.DataLoader;
+import java.util.List;
+import org.apache.maven.project.MavenProject;
+
+
+/**
+ * A data loader for Maven
+ */
+public class MavenDataLoader implements DataLoader {
+ public static final class MavenData {
+ private final MavenProject project;
+
+ public MavenData(MavenProject project) {
+ this.project = project;
+ }
+
+ public MavenProject getProject() {
+ return project;
+ }
+ }
+
+ public static final String MAVEN_DATA_ATTRIBUTE = "maven.data";
+
+ @Override
+ public Object load(Engine e, List args)
+ throws Exception {
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("maven model data loader has no parameters");
+ }
+
+ MavenData data = (MavenData) e.getAttribute(MAVEN_DATA_ATTRIBUTE);
+ return data;
+ }
+}
diff --git a/headerdefinition.xml b/headerdefinition.xml
index f3fd4287ed..34d178e654 100644
--- a/headerdefinition.xml
+++ b/headerdefinition.xml
@@ -40,4 +40,14 @@
<isMultiline>true</isMultiline>
<padLines>false</padLines>
</javadoc_style>
+ <ftl_style>
+ <firstLine><#--</firstLine>
+ <beforeEachLine>// </beforeEachLine>
+ <endLine>--></endLine>
+ <firstLineDetectionPattern>(\s|\t)*/\*.*$</firstLineDetectionPattern>
+ <lastLineDetectionPattern>.*\*/(\s|\t)*$</lastLineDetectionPattern>
+ <allowBlankLines>false</allowBlankLines>
+ <isMultiline>true</isMultiline>
+ <padLines>false</padLines>
+ </ftl_style>
</additionalHeaders>
diff --git a/pinot-common/pom.xml b/pinot-common/pom.xml
index 76bd905535..2bb5b8127b 100644
--- a/pinot-common/pom.xml
+++ b/pinot-common/pom.xml
@@ -98,6 +98,93 @@
</java>
</configuration>
</plugin>
+ <plugin>
+ <!-- Extract parser grammar template from calcite-core.jar and put
+ it under ${project.build.directory} where all freemarker templates are. -->
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>unpack-parser-template</id>
+ <phase>initialize</phase>
+ <goals>
+ <goal>unpack</goal>
+ </goals>
+ <configuration>
+ <artifactItems>
+ <artifactItem>
+ <groupId>org.apache.calcite</groupId>
+ <artifactId>calcite-core</artifactId>
+ <type>jar</type>
+ <overWrite>true</overWrite>
+ <outputDirectory>${project.build.directory}/</outputDirectory>
+ <includes>**/Parser.jj,**/default_config.fmpp</includes>
+ </artifactItem>
+ </artifactItems>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.pinot</groupId>
+ <artifactId>pinot-fmpp-maven-plugin</artifactId>
+ <version>${project.version}</version>
+ <executions>
+ <execution>
+ <id>generate-fmpp-sources</id>
+ <phase>generate-sources</phase>
+ <goals>
+ <goal>generate</goal>
+ </goals>
+ <configuration>
+ <config>${project.basedir}/src/main/codegen/config.fmpp</config>
+ <output>${project.build.directory}/generated-sources/fmpp</output>
+ <templates>${project.build.directory}/codegen/templates</templates>
+ <data>tdd(${project.basedir}/src/main/codegen/config.fmpp), default:tdd(${project.build.directory}/codegen/default_config.fmpp)</data>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>add-generated-sources</id>
+ <phase>process-sources</phase>
+ <goals>
+ <goal>add-source</goal>
+ </goals>
+ <configuration>
+ <sources>
+ <source>${project.build.directory}/generated-sources/javacc</source>
+ </sources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>javacc-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>generate-sources</phase>
+ <id>javacc</id>
+ <goals>
+ <goal>javacc</goal>
+ </goals>
+ <configuration>
+ <sourceDirectory>${project.build.directory}/generated-sources/fmpp</sourceDirectory>
+ <includes>
+ <include>**/Parser.jj</include>
+ </includes>
+ <lookAhead>2</lookAhead>
+ <isStatic>false</isStatic>
+ <outputDirectory>${project.build.directory}/generated-sources/javacc</outputDirectory>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
<dependencies>
diff --git a/pinot-common/src/main/codegen/config.fmpp b/pinot-common/src/main/codegen/config.fmpp
new file mode 100644
index 0000000000..c83241a6a1
--- /dev/null
+++ b/pinot-common/src/main/codegen/config.fmpp
@@ -0,0 +1,540 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+data: {
+ # Data declarations for this parser.
+ #
+ # Default declarations are in default_config.fmpp; if you do not include a
+ # declaration ('imports' or 'nonReservedKeywords', for example) in this file,
+ # FMPP will use the declaration from default_config.fmpp.
+ parser: {
+ # Generated parser implementation class package and name
+ package: "org.apache.pinot.sql.parsers.parser",
+ class: "SqlParserImpl",
+
+ # List of import statements.
+ imports: [
+ "com.google.common.collect.*"
+ "org.apache.pinot.sql.parsers.parser.*"
+ "java.util.*"
+ ]
+
+ # List of new keywords to add
+ keywords: [
+ "FILE"
+ "ARCHIVE"
+ ]
+
+ # List of non-reserved keywords to add
+ nonReservedKeywordsToAdd: [
+ # customized for Pinot
+ "FILE"
+ "ARCHIVE"
+
+ # The following keywords are reserved in core Calcite,
+ # are reserved in some version of SQL,
+ # but are not reserved in Babel.
+ #
+ # Words that are commented out (e.g. "AND") are still reserved.
+ # These are the most important reserved words, and SQL cannot be
+ # unambiguously parsed if they are not reserved. For example, if
+ # "INNER" is not reserved then in the query
+ #
+ # select * from emp inner join dept using (deptno)"
+ #
+ # "inner" could be a table alias for "emp".
+ #
+ # TODO: remove unused/untested BABEL non-reserved keywords since
+ # we are only testing a small subset.
+ # @see: CalciteSqlCompilerTest:testReservedKeywords
+ "ABS"
+ "ABSOLUTE"
+ "ACTION"
+ "ADD"
+ "AFTER"
+ "ALL"
+ "ALLOCATE"
+ "ALLOW"
+ "ALTER"
+ "AND"
+# "ANY"
+ "ARE"
+ "ARRAY"
+# "ARRAY_AGG" # not a keyword in Calcite
+ "ARRAY_MAX_CARDINALITY"
+ "AS"
+ "ASC"
+ "ASENSITIVE"
+ "ASSERTION"
+ "ASYMMETRIC"
+ "AT"
+ "ATOMIC"
+ "AUTHORIZATION"
+ "AVG"
+ "BEFORE"
+ "BEGIN"
+ "BEGIN_FRAME"
+ "BEGIN_PARTITION"
+ "BETWEEN"
+ "BIGINT"
+ "BINARY"
+ "BIT"
+# "BIT_LENGTH" # not a keyword in Calcite
+ "BLOB"
+ "BOOLEAN"
+ "BOTH"
+ "BREADTH"
+ "BY"
+# "CALL"
+ "CALLED"
+ "CARDINALITY"
+ "CASCADE"
+ "CASCADED"
+# "CASE"
+ "CAST"
+ "CATALOG"
+ "CEIL"
+ "CEILING"
+ "CHAR"
+ "CHARACTER"
+ "CHARACTER_LENGTH"
+ "CHAR_LENGTH"
+ "CHECK"
+ "CLASSIFIER"
+ "CLOB"
+ "CLOSE"
+ "COALESCE"
+ "COLLATE"
+ "COLLATION"
+ "COLLECT"
+ "COLUMN"
+ "COMMIT"
+ "CONDITION"
+ "CONNECT"
+ "CONNECTION"
+ "CONSTRAINT"
+ "CONSTRAINTS"
+ "CONSTRUCTOR"
+ "CONTAINS"
+ "CONTINUE"
+ "CONVERT"
+ "CORR"
+ "CORRESPONDING"
+ "COUNT"
+ "COVAR_POP"
+ "COVAR_SAMP"
+# "CREATE"
+# "CROSS"
+ "CUBE"
+ "CUME_DIST"
+# "CURRENT"
+ "CURRENT_CATALOG"
+ "CURRENT_DATE"
+ "CURRENT_DEFAULT_TRANSFORM_GROUP"
+ "CURRENT_PATH"
+ "CURRENT_ROLE"
+ "CURRENT_ROW"
+ "CURRENT_SCHEMA"
+ "CURRENT_TIME"
+ "CURRENT_TIMESTAMP"
+ "CURRENT_TRANSFORM_GROUP_FOR_TYPE"
+ "CURRENT_USER"
+# "CURSOR"
+ "CYCLE"
+ "DATA"
+# "DATE"
+ "DAY"
+ "DEALLOCATE"
+ "DEC"
+ "DECIMAL"
+ "DECLARE"
+# "DEFAULT"
+ "DEFERRABLE"
+ "DEFERRED"
+# "DEFINE"
+# "DELETE"
+ "DENSE_RANK"
+ "DEPTH"
+ "DEREF"
+ "DESC"
+# "DESCRIBE" # must be reserved
+ "DESCRIPTOR"
+ "DETERMINISTIC"
+ "DIAGNOSTICS"
+ "DISALLOW"
+ "DISCONNECT"
+# "DISTINCT"
+# "DO" # not a keyword in Calcite
+ "DOMAIN"
+ "DOUBLE"
+# "DROP" # probably must be reserved
+ "DYNAMIC"
+ "EACH"
+ "ELEMENT"
+ "ELSE"
+# "ELSEIF" # not a keyword in Calcite
+ "EMPTY"
+ "END"
+# "END-EXEC" # not a keyword in Calcite, and contains '-'
+ "END_FRAME"
+ "END_PARTITION"
+ "EQUALS"
+ "ESCAPE"
+ "EVERY"
+# "EXCEPT" # must be reserved
+ "EXCEPTION"
+ "EXEC"
+ "EXECUTE"
+ "EXISTS"
+# "EXIT" # not a keyword in Calcite
+ "EXP"
+# "EXPLAIN" # must be reserved
+ "EXTEND"
+ "EXTERNAL"
+ "EXTRACT"
+ "FALSE"
+# "FETCH"
+ "FILTER"
+ "FIRST"
+ "FIRST_VALUE"
+ "FLOAT"
+ "FLOOR"
+ "FOR"
+ "FOREIGN"
+# "FOREVER" # not a keyword in Calcite
+ "FOUND"
+ "FRAME_ROW"
+ "FREE"
+# "FROM" # must be reserved
+# "FULL" # must be reserved
+ "FUNCTION"
+ "FUSION"
+ "GENERAL"
+ "GET"
+ "GLOBAL"
+ "GO"
+ "GOTO"
+# "GRANT"
+# "GROUP"
+# "GROUPING"
+ "GROUPS"
+# "HANDLER" # not a keyword in Calcite
+# "HAVING"
+ "HOLD"
+ "HOUR"
+ "IDENTITY"
+# "IF" # not a keyword in Calcite
+ "ILIKE"
+ "IMMEDIATE"
+ "IMMEDIATELY"
+ "IMPORT"
+# "IN"
+ "INDICATOR"
+ "INITIAL"
+ "INITIALLY"
+# "INNER"
+ "INOUT"
+ "INPUT"
+ "INSENSITIVE"
+# "INSERT"
+ "INT"
+ "INTEGER"
+# "INTERSECT"
+ "INTERSECTION"
+# "INTERVAL"
+# "INTO"
+ "IS"
+ "ISOLATION"
+# "ITERATE" # not a keyword in Calcite
+# "JOIN"
+ "JSON_ARRAY"
+ "JSON_ARRAYAGG"
+ "JSON_EXISTS"
+ "JSON_OBJECT"
+ "JSON_OBJECTAGG"
+ "JSON_QUERY"
+ "JSON_VALUE"
+# "KEEP" # not a keyword in Calcite
+ "KEY"
+ "LAG"
+ "LANGUAGE"
+ "LARGE"
+ "LAST"
+ "LAST_VALUE"
+# "LATERAL"
+ "LEAD"
+ "LEADING"
+# "LEAVE" # not a keyword in Calcite
+# "LEFT"
+ "LEVEL"
+ "LIKE"
+ "LIKE_REGEX"
+# "LIMIT"
+ "LN"
+ "LOCAL"
+ "LOCALTIME"
+ "LOCALTIMESTAMP"
+ "LOCATOR"
+# "LOOP" # not a keyword in Calcite
+ "LOWER"
+ "MAP"
+ "MATCH"
+ "MATCHES"
+ "MATCH_NUMBER"
+# "MATCH_RECOGNIZE"
+ "MAX"
+# "MAX_CARDINALITY" # not a keyword in Calcite
+ "MEASURES"
+ "MEMBER"
+# "MERGE"
+ "METHOD"
+ "MIN"
+# "MINUS"
+ "MINUTE"
+ "MOD"
+ "MODIFIES"
+ "MODULE"
+ "MONTH"
+ "MULTISET"
+ "NAMES"
+ "NATIONAL"
+# "NATURAL"
+ "NCHAR"
+ "NCLOB"
+# "NEW"
+# "NEXT"
+ "NO"
+ "NONE"
+ "NORMALIZE"
+ "NOT"
+ "NTH_VALUE"
+ "NTILE"
+# "NULL"
+ "NULLIF"
+ "NUMERIC"
+ "OBJECT"
+ "OCCURRENCES_REGEX"
+ "OCTET_LENGTH"
+ "OF"
+# "OFFSET"
+ "OLD"
+ "OMIT"
+# "ON"
+ "ONE"
+ "ONLY"
+ "OPEN"
+ "OPTION"
+ "OR"
+# "ORDER"
+ "ORDINALITY"
+ "OUT"
+# "OUTER"
+ "OUTPUT"
+# "OVER"
+ "OVERLAPS"
+ "OVERLAY"
+ "PAD"
+ "PARAMETER"
+ "PARTIAL"
+# "PARTITION"
+ "PATH"
+# "PATTERN"
+ "PER"
+ "PERCENT"
+ "PERCENTILE_CONT"
+ "PERCENTILE_DISC"
+ "PERCENT_RANK"
+ "PERIOD"
+ "PERMUTE"
+ "PORTION"
+ "POSITION"
+ "POSITION_REGEX"
+ "POWER"
+ "PRECEDES"
+ "PRECISION"
+ "PREPARE"
+ "PRESERVE"
+ "PREV"
+ "PRIMARY"
+ "PRIOR"
+ "PRIVILEGES"
+ "PROCEDURE"
+ "PUBLIC"
+# "RANGE"
+ "RANK"
+ "READ"
+ "READS"
+ "REAL"
+ "RECURSIVE"
+ "REF"
+ "REFERENCES"
+ "REFERENCING"
+ "REGR_AVGX"
+ "REGR_AVGY"
+ "REGR_COUNT"
+ "REGR_INTERCEPT"
+ "REGR_R2"
+ "REGR_SLOPE"
+ "REGR_SXX"
+ "REGR_SXY"
+ "REGR_SYY"
+ "RELATIVE"
+ "RELEASE"
+# "REPEAT" # not a keyword in Calcite
+ "RESET"
+# "RESIGNAL" # not a keyword in Calcite
+ "RESTRICT"
+ "RESULT"
+ "RETURN"
+ "RETURNS"
+ "REVOKE"
+# "RIGHT"
+ "RLIKE"
+ "ROLE"
+ "ROLLBACK"
+# "ROLLUP"
+ "ROUTINE"
+# "ROW"
+# "ROWS"
+ "ROW_NUMBER"
+ "RUNNING"
+ "SAVEPOINT"
+ "SCHEMA"
+ "SCOPE"
+ "SCROLL"
+ "SEARCH"
+ "SECOND"
+ "SECTION"
+ "SEEK"
+# "SELECT"
+ "SENSITIVE"
+ "SESSION"
+ "SESSION_USER"
+# "SET"
+# "SETS"
+ "SHOW"
+# "SIGNAL" # not a keyword in Calcite
+ "SIMILAR"
+ "SIZE"
+# "SKIP" # messes with JavaCC's <SKIP> token
+ "SMALLINT"
+# "SOME"
+ "SPACE"
+ "SPECIFIC"
+ "SPECIFICTYPE"
+ "SQL"
+# "SQLCODE" # not a keyword in Calcite
+# "SQLERROR" # not a keyword in Calcite
+ "SQLEXCEPTION"
+ "SQLSTATE"
+ "SQLWARNING"
+ "SQRT"
+ "START"
+ "STATE"
+ "STATIC"
+ "STDDEV_POP"
+ "STDDEV_SAMP"
+# "STREAM"
+ "SUBMULTISET"
+ "SUBSET"
+ "SUBSTRING"
+ "SUBSTRING_REGEX"
+ "SUCCEEDS"
+ "SUM"
+ "SYMMETRIC"
+ "SYSTEM"
+ "SYSTEM_TIME"
+ "SYSTEM_USER"
+# "TABLE"
+# "TABLESAMPLE"
+ "TEMPORARY"
+# "THEN"
+# "TIME"
+# "TIMESTAMP"
+ "TIMEZONE_HOUR"
+ "TIMEZONE_MINUTE"
+ "TINYINT"
+ "TO"
+ "TRAILING"
+ "TRANSACTION"
+ "TRANSLATE"
+ "TRANSLATE_REGEX"
+ "TRANSLATION"
+ "TREAT"
+ "TRIGGER"
+ "TRIM"
+ "TRIM_ARRAY"
+ "TRUE"
+ "TRUNCATE"
+ "UESCAPE"
+ "UNDER"
+# "UNDO" # not a keyword in Calcite
+# "UNION"
+ "UNIQUE"
+ "UNKNOWN"
+# "UNNEST"
+# "UNTIL" # not a keyword in Calcite
+# "UPDATE"
+ "UPPER"
+ "UPSERT"
+ "USAGE"
+ "USER"
+# "USING"
+ "VALUE"
+# "VALUES"
+ "VALUE_OF"
+ "VARBINARY"
+ "VARCHAR"
+ "VARYING"
+ "VAR_POP"
+ "VAR_SAMP"
+ "VERSION"
+ "VERSIONING"
+# "VERSIONS" # not a keyword in Calcite
+ "VIEW"
+# "WHEN"
+ "WHENEVER"
+# "WHERE"
+# "WHILE" # not a keyword in Calcite
+ "WIDTH_BUCKET"
+# "WINDOW"
+# "WITH"
+ "WITHIN"
+ "WITHOUT"
+ "WORK"
+ "WRITE"
+ "YEAR"
+ "ZONE"
+ ]
+
+ # List of extended statement syntax to add
+ statementParserMethods: [
+ "SqlInsertFromFile()"
+ ]
+
+ # List of files in @includes directory that have parser method
+ implementationFiles: [
+ "parserImpls.ftl"
+ ],
+ }
+}
+
+freemarkerLinks: {
+ includes: includes/
+}
diff --git a/pinot-common/src/main/codegen/includes/parserImpls.ftl b/pinot-common/src/main/codegen/includes/parserImpls.ftl
new file mode 100644
index 0000000000..57d3e2b8af
--- /dev/null
+++ b/pinot-common/src/main/codegen/includes/parserImpls.ftl
@@ -0,0 +1,103 @@
+<#--
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+-->
+
+private void DataFileDef(List<SqlNode> list) :
+{
+ SqlParserPos pos;
+ SqlNode uri;
+}
+{
+ ( <FILE> | <ARCHIVE> )
+ {
+ pos = getPos();
+ list.add(StringLiteral());
+ }
+}
+
+SqlNodeList DataFileDefList() :
+{
+ SqlParserPos pos;
+ List<SqlNode> list = Lists.newArrayList();
+}
+{
+ <FROM> { pos = getPos(); }
+ { pos = getPos(); }
+ DataFileDef(list)
+ ( <COMMA> DataFileDef(list) )*
+ {
+ return new SqlNodeList(list, pos.plus(getPos()));
+ }
+}
+
+/**
+ * INSERT INTO [db_name.]table_name
+ * FROM [ FILE | ARCHIVE ] 'file_uri' [, [ FILE | ARCHIVE ] 'file_uri' ]
+ */
+SqlInsertFromFile SqlInsertFromFile() :
+{
+ SqlParserPos pos;
+ SqlIdentifier dbName = null;
+ SqlIdentifier tableName;
+ SqlNodeList fileList = null;
+}
+{
+ <INSERT> { pos = getPos(); }
+ <INTO>
+ [
+ dbName = SimpleIdentifier()
+ <DOT>
+ ]
+
+ tableName = SimpleIdentifier()
+ [
+ fileList = DataFileDefList()
+ ]
+ {
+ return new SqlInsertFromFile(pos, dbName, tableName, fileList);
+ }
+}
+
+/* define the rest of the sql into SqlStmtList
+ */
+private void SqlStatementList(SqlNodeList list) :
+{
+}
+{
+ {
+ list.add(SqlStmt());
+ }
+}
+
+SqlNodeList SqlStmtsEof() :
+{
+ SqlParserPos pos;
+ SqlNodeList stmts;
+}
+{
+ {
+ pos = getPos();
+ stmts = new SqlNodeList(pos);
+ stmts.add(SqlStmt());
+ }
+ ( LOOKAHEAD(2, <SEMICOLON> SqlStmt()) <SEMICOLON> SqlStatementList(stmts) )*
+ [ <SEMICOLON> ] <EOF>
+ {
+ return stmts;
+ }
+}
diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
index 98c1e5fefa..1a1a1ac46e 100644
--- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
+++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
@@ -19,6 +19,7 @@
package org.apache.pinot.sql.parsers;
import com.google.common.annotations.VisibleForTesting;
+import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@@ -29,7 +30,7 @@ import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.calcite.config.Lex;
+import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.sql.SqlBasicCall;
import org.apache.calcite.sql.SqlDataTypeSpec;
import org.apache.calcite.sql.SqlExplain;
@@ -45,9 +46,8 @@ import org.apache.calcite.sql.SqlSelectKeyword;
import org.apache.calcite.sql.fun.SqlBetweenOperator;
import org.apache.calcite.sql.fun.SqlCase;
import org.apache.calcite.sql.fun.SqlLikeOperator;
-import org.apache.calcite.sql.parser.SqlParseException;
+import org.apache.calcite.sql.parser.SqlAbstractParserImpl;
import org.apache.calcite.sql.parser.SqlParser;
-import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl;
import org.apache.calcite.sql.validate.SqlConformanceEnum;
import org.apache.commons.collections.CollectionUtils;
import org.apache.pinot.common.request.DataSource;
@@ -60,6 +60,7 @@ import org.apache.pinot.common.utils.request.RequestUtils;
import org.apache.pinot.pql.parsers.pql2.ast.FilterKind;
import org.apache.pinot.segment.spi.AggregationFunctionType;
import org.apache.pinot.spi.utils.Pairs;
+import org.apache.pinot.sql.parsers.parser.SqlParserImpl;
import org.apache.pinot.sql.parsers.rewriter.QueryRewriter;
import org.apache.pinot.sql.parsers.rewriter.QueryRewriterFactory;
import org.slf4j.Logger;
@@ -73,18 +74,6 @@ public class CalciteSqlParser {
public static final List<QueryRewriter> QUERY_REWRITERS = new ArrayList<>(QueryRewriterFactory.getQueryRewriters());
private static final Logger LOGGER = LoggerFactory.getLogger(CalciteSqlParser.class);
- /** Lexical policy similar to MySQL with ANSI_QUOTES option enabled. (To be
- * precise: MySQL on Windows; MySQL on Linux uses case-sensitive matching,
- * like the Linux file system.) The case of identifiers is preserved whether
- * or not they quoted; after which, identifiers are matched
- * case-insensitively. Double quotes allow identifiers to contain
- * non-alphanumeric characters. */
- private static final Lex PINOT_LEX = Lex.MYSQL_ANSI;
-
- // BABEL is a very liberal conformance value that allows anything supported by any dialect
- private static final SqlParser.Config PARSER_CONFIG =
- SqlParser.configBuilder().setLex(PINOT_LEX).setConformance(SqlConformanceEnum.BABEL)
- .setParserFactory(SqlBabelParserImpl.FACTORY).build();
// To Keep the backward compatibility with 'OPTION' Functionality in PQL, which is used to
// provide more hints for query processing.
//
@@ -95,6 +84,7 @@ public class CalciteSqlParser {
// `OPTION (<k1> = <v1>, <k2> = <v2>, <k3> = <v3>)`
// or
// `OPTION (<k1> = <v1>) OPTION (<k2> = <v2>) OPTION (<k3> = <v3>)`
+ // TODO: move to use parser syntax extension: `OPTION` `(` `<key>` = `<value>` [, `<key>` = `<value>`]* `)`
private static final Pattern OPTIONS_REGEX_PATTEN =
Pattern.compile("option\\s*\\(([^\\)]+)\\)", Pattern.CASE_INSENSITIVE);
@@ -131,11 +121,11 @@ public class CalciteSqlParser {
sql = removeOptionsFromSql(sql);
}
- SqlParser sqlParser = SqlParser.create(sql, PARSER_CONFIG);
SqlNode sqlNode;
- try {
- sqlNode = sqlParser.parseQuery();
- } catch (SqlParseException e) {
+ try (StringReader inStream = new StringReader(sql)) {
+ SqlParserImpl sqlParser = newSqlParser(inStream);
+ sqlNode = sqlParser.parseSqlStmtEof();
+ } catch (Throwable e) {
throw new SqlCompilationException("Caught exception while parsing query: " + sql, e);
}
@@ -311,16 +301,29 @@ public class CalciteSqlParser {
* @throws SqlCompilationException if String is not a valid expression.
*/
public static Expression compileToExpression(String expression) {
- SqlParser sqlParser = SqlParser.create(expression, PARSER_CONFIG);
SqlNode sqlNode;
- try {
- sqlNode = sqlParser.parseExpression();
- } catch (SqlParseException e) {
+ try (StringReader inStream = new StringReader(expression)) {
+ SqlParserImpl sqlParser = newSqlParser(inStream);
+ sqlNode = sqlParser.parseSqlExpressionEof();
+ } catch (Throwable e) {
throw new SqlCompilationException("Caught exception while parsing expression: " + expression, e);
}
return toExpression(sqlNode);
}
+ @VisibleForTesting
+ static SqlParserImpl newSqlParser(StringReader inStream) {
+ SqlParserImpl sqlParser = new SqlParserImpl(inStream);
+ sqlParser.switchTo(SqlAbstractParserImpl.LexicalState.DQID);
+ // TODO: convert to MySQL conformance once we retired most of the un-tested BABEL tokens
+ sqlParser.setConformance(SqlConformanceEnum.BABEL);
+ sqlParser.setTabSize(1);
+ sqlParser.setQuotedCasing(Casing.UNCHANGED);
+ sqlParser.setUnquotedCasing(Casing.UNCHANGED);
+ sqlParser.setIdentifierMaxLength(SqlParser.DEFAULT_IDENTIFIER_MAX_LENGTH);
+ return sqlParser;
+ }
+
private static void setOptions(PinotQuery pinotQuery, List<String> optionsStatements) {
if (optionsStatements.isEmpty()) {
return;
diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/SqlInsertFromFile.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/SqlInsertFromFile.java
new file mode 100644
index 0000000000..3b2df6f778
--- /dev/null
+++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/SqlInsertFromFile.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.sql.parsers.parser;
+
+import java.util.Arrays;
+import java.util.List;
+import org.apache.calcite.sql.SqlCall;
+import org.apache.calcite.sql.SqlIdentifier;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlNodeList;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.SqlSpecialOperator;
+import org.apache.calcite.sql.SqlWriter;
+import org.apache.calcite.sql.parser.SqlParserPos;
+
+
+/**
+ * Calcite extension for creating an INSERT sql node from a File object.
+ *
+ * <p>Syntax: INSERT INTO [db_name.]table_name FROM [ FILE | ARCHIVE ] 'file_uri' [, [ FILE | ARCHIVE ] 'file_uri' ]
+ */
+public class SqlInsertFromFile extends SqlCall {
+ private static final SqlSpecialOperator OPERATOR = new SqlSpecialOperator("UDF", SqlKind.OTHER_DDL);
+ private SqlIdentifier _dbName;
+ private SqlIdentifier _tableName;
+ private SqlNodeList _fileList;
+
+ public SqlInsertFromFile(SqlParserPos pos, SqlIdentifier dbName, SqlIdentifier tableName, SqlNodeList fileList) {
+ super(pos);
+ _dbName = dbName;
+ _tableName = tableName;
+ _fileList = fileList;
+ }
+
+ @Override
+ public void unparse(SqlWriter writer, int leftPrec, int rightPrec) {
+ UnparseUtils u = new UnparseUtils(writer, leftPrec, rightPrec);
+ u.keyword("INSERT", "INTO");
+ if (_dbName != null) {
+ u.node(_dbName).keyword(".");
+ }
+ u.node(_tableName);
+ if (_fileList != null) {
+ u.keyword("FROM").nodeList(_fileList);
+ }
+ }
+
+ @Override
+ public SqlOperator getOperator() {
+ return OPERATOR;
+ }
+
+ @Override
+ public List<SqlNode> getOperandList() {
+ return Arrays.asList(_dbName, _tableName, _fileList);
+ }
+}
diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/UnparseUtils.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/UnparseUtils.java
new file mode 100644
index 0000000000..4d85337c73
--- /dev/null
+++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/UnparseUtils.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.sql.parsers.parser;
+
+import java.util.Arrays;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlNodeList;
+import org.apache.calcite.sql.SqlWriter;
+
+
+/**
+ * {@code UnparseUtils} provides utility for unparsing keywords, {@link SqlNode} or {@link SqlNodeList} using provided
+ * {@link SqlWriter}.
+ *
+ * @see SqlNode#unparse(SqlWriter, int, int)
+ */
+class UnparseUtils {
+ private final SqlWriter _writer;
+ private final int _leftPrec;
+ private final int _rightPrec;
+
+ UnparseUtils(SqlWriter writer, int leftPrec, int rightPrec) {
+ _writer = writer;
+ _leftPrec = leftPrec;
+ _rightPrec = rightPrec;
+ }
+
+ UnparseUtils keyword(String... keywords) {
+ Arrays.stream(keywords).forEach(_writer::keyword);
+ return this;
+ }
+
+ UnparseUtils node(SqlNode n) {
+ n.unparse(_writer, _leftPrec, _rightPrec);
+ return this;
+ }
+
+ UnparseUtils nodeList(SqlNodeList l) {
+ _writer.keyword("(");
+ if (l.size() > 0) {
+ l.get(0).unparse(_writer, _leftPrec, _rightPrec);
+ for (int i = 1; i < l.size(); i++) {
+ _writer.keyword(",");
+ l.get(i).unparse(_writer, _leftPrec, _rightPrec);
+ }
+ }
+ _writer.keyword(")");
+ return this;
+ }
+}
diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
index ae0eae0e19..83cde06edc 100644
--- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
@@ -18,14 +18,15 @@
*/
package org.apache.pinot.sql.parsers;
+import java.io.StringReader;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
+import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlNumericLiteral;
-import org.apache.calcite.sql.parser.SqlParseException;
import org.apache.pinot.common.request.AggregationInfo;
import org.apache.pinot.common.request.BrokerRequest;
import org.apache.pinot.common.request.Expression;
@@ -40,6 +41,8 @@ import org.apache.pinot.common.utils.request.RequestUtils;
import org.apache.pinot.pql.parsers.PinotQuery2BrokerRequestConverter;
import org.apache.pinot.pql.parsers.pql2.ast.FilterKind;
import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.sql.parsers.parser.SqlInsertFromFile;
+import org.apache.pinot.sql.parsers.parser.SqlParserImpl;
import org.apache.pinot.sql.parsers.rewriter.CompileTimeFunctionsInvoker;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -445,9 +448,8 @@ public class CalciteSqlCompilerTest {
literal = pinotQuery.getSelectList().get(0).getLiteral();
Assert.assertNull(literal);
- pinotQuery = CalciteSqlParser
- .compileToPinotQuery("select encodeUrl('key1=value 1&key2=value@!$2&key3=value%3'), "
- + "decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253') from mytable");
+ pinotQuery = CalciteSqlParser.compileToPinotQuery("select encodeUrl('key1=value 1&key2=value@!$2&key3=value%3'), "
+ + "decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253') from mytable");
Literal literal1 = pinotQuery.getSelectList().get(0).getLiteral();
Literal literal2 = pinotQuery.getSelectList().get(1).getLiteral();
Assert.assertNotNull(literal1);
@@ -460,8 +462,8 @@ public class CalciteSqlCompilerTest {
Assert.assertEquals(tempBrokerRequest.getSelections().getSelectionColumns().get(1),
String.format("'%s'", literal2.getFieldValue().toString()));
- pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT count(*) from mytable "
- + "where bar = encodeUrl('key1=value 1&key2=value@!$2&key3=value%3')");
+ pinotQuery = CalciteSqlParser.compileToPinotQuery(
+ "SELECT count(*) from mytable " + "where bar = encodeUrl('key1=value 1&key2=value@!$2&key3=value%3')");
literal = pinotQuery.getSelectList().get(0).getLiteral();
Assert.assertNull(literal);
@@ -1003,8 +1005,6 @@ public class CalciteSqlCompilerTest {
Assert.fail("Query should have failed compilation");
} catch (Exception e) {
Assert.assertTrue(e instanceof SqlCompilationException);
- Assert.assertTrue(e.getCause() instanceof SqlParseException);
- Assert.assertTrue(e.getCause().getMessage().contains("Encountered \", DISTINCT\" at line 1, column 15."));
}
// not supported by Calcite SQL (this is in compliance with SQL standard)
@@ -1014,8 +1014,6 @@ public class CalciteSqlCompilerTest {
Assert.fail("Query should have failed compilation");
} catch (Exception e) {
Assert.assertTrue(e instanceof SqlCompilationException);
- Assert.assertTrue(e.getCause() instanceof SqlParseException);
- Assert.assertTrue(e.getCause().getMessage().contains("Encountered \", DISTINCT\" at line 1, column 10."));
}
// not supported by Calcite SQL (this is in compliance with SQL standard)
@@ -1025,8 +1023,6 @@ public class CalciteSqlCompilerTest {
Assert.fail("Query should have failed compilation");
} catch (Exception e) {
Assert.assertTrue(e instanceof SqlCompilationException);
- Assert.assertTrue(e.getCause() instanceof SqlParseException);
- Assert.assertTrue(e.getCause().getMessage().contains("Encountered \", DISTINCT\" at line 1, column 18."));
}
// The following query although a valid SQL syntax is not
@@ -1559,9 +1555,7 @@ public class CalciteSqlCompilerTest {
Assert.fail("Query should have failed to compile");
} catch (Exception e) {
Assert.assertTrue(e instanceof SqlCompilationException);
- Assert.assertTrue(e.getCause() instanceof SqlParseException);
String message = e.getCause().getMessage();
- Assert.assertTrue(message.startsWith("Encountered") && message.contains("table"));
}
// date - need to escape
try {
@@ -1569,9 +1563,6 @@ public class CalciteSqlCompilerTest {
Assert.fail("Query should have failed to compile");
} catch (Exception e) {
Assert.assertTrue(e instanceof SqlCompilationException);
- Assert.assertTrue(e.getCause() instanceof SqlParseException);
- String message = e.getCause().getMessage();
- Assert.assertTrue(message.startsWith("Encountered") && message.contains("Date"));
}
// timestamp - need to escape
@@ -1580,9 +1571,6 @@ public class CalciteSqlCompilerTest {
Assert.fail("Query should have failed to compile");
} catch (Exception e) {
Assert.assertTrue(e instanceof SqlCompilationException);
- Assert.assertTrue(e.getCause() instanceof SqlParseException);
- String message = e.getCause().getMessage();
- Assert.assertTrue(message.startsWith("Encountered") && message.contains("timestamp"));
}
// time - need to escape
@@ -1591,9 +1579,6 @@ public class CalciteSqlCompilerTest {
Assert.fail("Query should have failed to compile");
} catch (Exception e) {
Assert.assertTrue(e instanceof SqlCompilationException);
- Assert.assertTrue(e.getCause() instanceof SqlParseException);
- String message = e.getCause().getMessage();
- Assert.assertTrue(message.startsWith("Encountered") && message.contains("time"));
}
// group - need to escape
@@ -1602,9 +1587,6 @@ public class CalciteSqlCompilerTest {
Assert.fail("Query should have failed to compile");
} catch (Exception e) {
Assert.assertTrue(e instanceof SqlCompilationException);
- Assert.assertTrue(e.getCause() instanceof SqlParseException);
- String message = e.getCause().getMessage();
- Assert.assertTrue(message.startsWith("Encountered") && message.contains("group"));
}
// escaping the above works
@@ -1998,15 +1980,14 @@ public class CalciteSqlCompilerTest {
Assert.assertEquals(expression.getLiteral().getFieldValue(),
"key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253");
- expression = CalciteSqlParser
- .compileToExpression("decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')");
+ expression =
+ CalciteSqlParser.compileToExpression("decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')");
Assert.assertNotNull(expression.getFunctionCall());
pinotQuery.setFilterExpression(expression);
pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery);
expression = pinotQuery.getFilterExpression();
Assert.assertNotNull(expression.getLiteral());
- Assert.assertEquals(expression.getLiteral().getFieldValue(),
- "key1=value 1&key2=value@!$2&key3=value%3");
+ Assert.assertEquals(expression.getLiteral().getFieldValue(), "key1=value 1&key2=value@!$2&key3=value%3");
expression = CalciteSqlParser.compileToExpression("reverse(playerName)");
Assert.assertNotNull(expression.getFunctionCall());
@@ -2656,4 +2637,24 @@ public class CalciteSqlCompilerTest {
Assert.expectThrows(SqlCompilationException.class,
() -> CalciteSqlParser.compileToPinotQuery("SELECT UPPER(col1), avg(col2) from foo"));
}
+
+ /**
+ * Test for customized components in src/main/codegen/parserImpls.ftl file.
+ */
+ @Test
+ public void testParserExtensionImpl() {
+ String customSql = "INSERT INTO db.tbl FROM FILE 'file:///tmp/file1', FILE 'file:///tmp/file2'";
+ SqlNode sqlNode = testSqlWithCustomSqlParser(customSql);
+ Assert.assertTrue(sqlNode instanceof SqlInsertFromFile);
+ }
+
+ private static SqlNode testSqlWithCustomSqlParser(String sqlString) {
+ try (StringReader inStream = new StringReader(sqlString)) {
+ SqlParserImpl sqlParser = CalciteSqlParser.newSqlParser(inStream);
+ return sqlParser.parseSqlStmtEof();
+ } catch (Exception e) {
+ Assert.fail("test custom sql parser failed", e);
+ }
+ return null;
+ }
}
diff --git a/pinot-connectors/prestodb-pinot-dependencies/pinot-common-jdk8/pom.xml b/pinot-connectors/prestodb-pinot-dependencies/pinot-common-jdk8/pom.xml
index abba9e2dc2..095cfca22c 100644
--- a/pinot-connectors/prestodb-pinot-dependencies/pinot-common-jdk8/pom.xml
+++ b/pinot-connectors/prestodb-pinot-dependencies/pinot-common-jdk8/pom.xml
@@ -86,6 +86,93 @@
</java>
</configuration>
</plugin>
+ <plugin>
+ <!-- Extract parser grammar template from calcite-core.jar and put
+ it under ${project.build.directory} where all freemarker templates are. -->
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>unpack-parser-template</id>
+ <phase>initialize</phase>
+ <goals>
+ <goal>unpack</goal>
+ </goals>
+ <configuration>
+ <artifactItems>
+ <artifactItem>
+ <groupId>org.apache.calcite</groupId>
+ <artifactId>calcite-core</artifactId>
+ <type>jar</type>
+ <overWrite>true</overWrite>
+ <outputDirectory>${project.build.directory}/</outputDirectory>
+ <includes>**/Parser.jj,**/default_config.fmpp</includes>
+ </artifactItem>
+ </artifactItems>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.pinot</groupId>
+ <artifactId>pinot-fmpp-maven-plugin</artifactId>
+ <version>${project.version}</version>
+ <executions>
+ <execution>
+ <id>generate-fmpp-sources</id>
+ <phase>generate-sources</phase>
+ <goals>
+ <goal>generate</goal>
+ </goals>
+ <configuration>
+ <config>${project.basedir}/src/main/codegen/config.fmpp</config>
+ <output>${project.build.directory}/generated-sources/fmpp</output>
+ <templates>${project.build.directory}/codegen/templates</templates>
+ <data>tdd(${project.basedir}/src/main/codegen/config.fmpp), default:tdd(${project.build.directory}/codegen/default_config.fmpp)</data>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>add-generated-sources</id>
+ <phase>process-sources</phase>
+ <goals>
+ <goal>add-source</goal>
+ </goals>
+ <configuration>
+ <sources>
+ <source>${project.build.directory}/generated-sources/javacc</source>
+ </sources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>javacc-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>generate-sources</phase>
+ <id>javacc</id>
+ <goals>
+ <goal>javacc</goal>
+ </goals>
+ <configuration>
+ <sourceDirectory>${project.build.directory}/generated-sources/fmpp</sourceDirectory>
+ <includes>
+ <include>**/Parser.jj</include>
+ </includes>
+ <lookAhead>2</lookAhead>
+ <isStatic>false</isStatic>
+ <outputDirectory>${project.build.directory}/generated-sources/javacc</outputDirectory>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
<dependencies>
diff --git a/pom.xml b/pom.xml
index 7ff2080020..6065d06353 100644
--- a/pom.xml
+++ b/pom.xml
@@ -56,6 +56,7 @@
<module>pinot-connectors</module>
<module>pinot-segment-local</module>
<module>pinot-compatibility-verifier</module>
+ <module>contrib/pinot-fmpp-maven-plugin</module>
</modules>
<licenses>
@@ -1662,6 +1663,8 @@
<config>SCRIPT_STYLE</config>
<queries>SCRIPT_STYLE</queries>
<results>SCRIPT_STYLE</results>
+ <fmpp>SCRIPT_STYLE</fmpp>
+ <ftl>FTL_STYLE</ftl>
<MockMaker>SCRIPT_STYLE</MockMaker>
<appAssemblerScriptTemplate>SCRIPT_STYLE</appAssemblerScriptTemplate>
<pql>SCRIPT_STYLE</pql>
@@ -1824,6 +1827,28 @@
</execution>
</executions>
</plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ <version>3.3.0</version>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>javacc-maven-plugin</artifactId>
+ <version>2.6</version>
+ <dependencies>
+ <dependency>
+ <groupId>net.java.dev.javacc</groupId>
+ <artifactId>javacc</artifactId>
+ <version>7.0.10</version>
+ </dependency>
+ </dependencies>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-plugin-plugin</artifactId>
+ <version>3.6.0</version>
+ </plugin>
</plugins>
</build>
<reporting>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org