You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2021/07/08 00:09:14 UTC

[incubator-pinot] branch master updated: Validate parsing multi value delimiter as unicode (#7133)

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new c42aef6  Validate parsing multi value delimiter as unicode (#7133)
c42aef6 is described below

commit c42aef6a52f7a888435d3dd97dc4091689966221
Author: Ken Krugler <ke...@transpac.com>
AuthorDate: Wed Jul 7 17:08:51 2021 -0700

    Validate parsing multi value delimiter as unicode (#7133)
    
    Add new test to validate proper handling of Unicode code points in yaml (since this changed from 0.6 to 0.7)
---
 .../ingestion/batch/IngestionJobLauncherTest.java  |  9 ++++
 .../test/resources/ingestion_job_spec_unicode.yaml | 48 ++++++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncherTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncherTest.java
index 23110e8..747bcdf 100644
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncherTest.java
+++ b/pinot-spi/src/test/java/org/apache/pinot/spi/ingestion/batch/IngestionJobLauncherTest.java
@@ -42,6 +42,15 @@ public class IngestionJobLauncherTest {
   }
 
   @Test
+  public void testIngestionJobLauncherWithUnicodeCharForMultivalueFieldDelimiter()
+      throws IOException, ClassNotFoundException {
+    SegmentGenerationJobSpec spec = IngestionJobLauncher.getSegmentGenerationJobSpec(
+        GroovyTemplateUtils.class.getClassLoader().getResource("ingestion_job_spec_unicode.yaml").getFile(), null,
+        null);
+    Assert.assertEquals("\ufff0", spec.getRecordReaderSpec().getConfigs().get("multiValueDelimiter"));
+  }
+
+  @Test
   public void testIngestionJobLauncherWithTemplateAndPropertyFile()
       throws IOException, ClassNotFoundException {
     SegmentGenerationJobSpec spec = IngestionJobLauncher.getSegmentGenerationJobSpec(
diff --git a/pinot-spi/src/test/resources/ingestion_job_spec_unicode.yaml b/pinot-spi/src/test/resources/ingestion_job_spec_unicode.yaml
new file mode 100644
index 0000000..4049da8
--- /dev/null
+++ b/pinot-spi/src/test/resources/ingestion_job_spec_unicode.yaml
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+executionFrameworkSpec:
+  name: 'standalone'
+  segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
+  segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
+jobType: SegmentCreationAndTarPush
+
+inputDirURI: 'file:///path/to/input/'
+includeFileNamePattern: 'glob:**/*.gz'
+outputDirURI: 'file:///path/to/output/'
+overwriteOutput: true
+segmentCreationJobParallelism: 100
+pinotFSSpecs:
+  - scheme: file
+    className: org.apache.pinot.spi.filesystem.LocalPinotFS
+recordReaderSpec:
+  dataFormat: 'csv'
+  className: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReader'
+  configClassName: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReaderConfig'
+  configs:
+    multiValueDelimiter: "\ufff0"
+tableSpec:
+  tableName: 'myTable'
+  schemaURI: 'http://localhost:9000/tables/myTable/schema'
+  tableConfigURI: 'http://localhost:9000/tables/myTable'
+pinotClusterSpecs:
+  - controllerURI: 'localhost:9000'
+pushJobSpec:
+  pushAttempts: 2
+  pushRetryIntervalMillis: 1000
\ No newline at end of file

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org