You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu> on 2023/08/24 03:52:20 UTC

Change in asterixdb[master]: WIP: support copy statement with csv files

From Peeyush Gupta <pe...@couchbase.com>:

Peeyush Gupta has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17739 )


Change subject: WIP: support copy statement with csv files
......................................................................

WIP: support copy statement with csv files

Change-Id: I7ac452559ab02e35f5b5fa84fbd0853a08b2bc86
---
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
3 files changed, 40 insertions(+), 9 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/39/17739/1

diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
index 09f9697..4e52321 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
@@ -46,13 +46,20 @@
     }
 
     private DelimitedDataParser createParser(IHyracksTaskContext ctx) throws HyracksDataException {
-        IValueParserFactory[] valueParserFactories = ExternalDataUtils.getValueParserFactories(recordType);
         char delimiter = ExternalDataUtils.validateGetDelimiter(configuration);
         char quote = ExternalDataUtils.validateGetQuote(configuration, delimiter);
         boolean hasHeader = ExternalDataUtils.hasHeader(configuration);
         String nullString = configuration.get(ExternalDataConstants.KEY_NULL_STR);
-        return new DelimitedDataParser(ctx, valueParserFactories, delimiter, quote, hasHeader, recordType,
-                ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM), nullString);
+        if (configuration.containsKey(ExternalDataConstants.KEY_FIELDS)) {
+            ARecordType recordTypeFromConfiguration = ExternalDataUtils.getRecordType(configuration);
+            return new DelimitedDataParser(ctx, ExternalDataUtils.getValueParserFactories(recordTypeFromConfiguration),
+                    delimiter, quote, hasHeader, recordTypeFromConfiguration,
+                    ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM), nullString);
+        } else {
+            IValueParserFactory[] valueParserFactories = ExternalDataUtils.getValueParserFactories(recordType);
+            return new DelimitedDataParser(ctx, valueParserFactories, delimiter, quote, hasHeader, recordType,
+                    ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM), nullString);
+        }
     }
 
     @Override
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index bc2ce63..a43af0f 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -133,6 +133,7 @@
     public static final String KEY_REDACT_WARNINGS = "redact-warnings";
     public static final String KEY_REQUESTED_FIELDS = "requested-fields";
     public static final String KEY_EXTERNAL_SCAN_BUFFER_SIZE = "external-scan-buffer-size";
+    public static final String KEY_FIELDS = "fields";
 
     /**
      * Keys for adapter name
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 2605fe7..22e51ee 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -70,12 +70,7 @@
 import org.apache.asterix.external.util.ExternalDataConstants.ParquetOptions;
 import org.apache.asterix.external.util.aws.s3.S3Constants;
 import org.apache.asterix.external.util.aws.s3.S3Utils;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.AUnionType;
-import org.apache.asterix.om.types.EnumDeserializer;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.om.types.TypeTagUtil;
+import org.apache.asterix.om.types.*;
 import org.apache.asterix.runtime.evaluators.common.NumberUtils;
 import org.apache.asterix.runtime.projection.ExternalDatasetProjectionFiltrationInfo;
 import org.apache.asterix.runtime.projection.FunctionCallInformation;
@@ -106,6 +101,9 @@
     private static final Map<ATypeTag, IValueParserFactory> valueParserFactoryMap = new EnumMap<>(ATypeTag.class);
     private static final int DEFAULT_MAX_ARGUMENT_SZ = 1024 * 1024;
     private static final int HEADER_FUDGE = 64;
+    private static final String COMMA = ",";
+    private static final String DOT = ".";
+    private static final String COLON = ":";
 
     static {
         valueParserFactoryMap.put(ATypeTag.INTEGER, IntegerParserFactory.INSTANCE);
@@ -1001,4 +999,20 @@
             ExternalDataPrefix externalDataPrefix, IExternalFilterEvaluator evaluator) throws HyracksDataException {
         return !key.endsWith("/") && predicate.test(matchers, key) && externalDataPrefix.evaluate(key, evaluator);
     }
+
+    public static ARecordType getRecordType(Map<String, String> configuration) throws HyracksDataException {
+        String fieldsString = configuration.get(ExternalDataConstants.KEY_FIELDS);
+        if (fieldsString == null) {
+            throw new HyracksDataException("fields key missing");
+        } else {
+            List<String> fields = new ArrayList<>();
+            List<IAType> fieldTypes = new ArrayList<>();
+            Arrays.stream(fieldsString.strip().split(COMMA)).sequential().forEach(e -> {
+                String[] entry = e.split(COLON);
+                fields.add(entry[0]);
+                fieldTypes.add(BuiltinTypeMap.getBuiltinType(entry[1]));
+            });
+            return new ARecordType("root", fields.toArray(new String[0]), fieldTypes.toArray(new IAType[0]), true);
+        }
+    }
 }

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17739
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I7ac452559ab02e35f5b5fa84fbd0853a08b2bc86
Gerrit-Change-Number: 17739
Gerrit-PatchSet: 1
Gerrit-Owner: Peeyush Gupta <pe...@couchbase.com>
Gerrit-MessageType: newchange