You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mh...@apache.org on 2023/03/28 01:46:45 UTC

[asterixdb] branch master updated: [ASTERIXDB-3153][OTH] Make the default storage format configurable

This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 5ba7d8152b [ASTERIXDB-3153][OTH] Make the default storage format configurable
5ba7d8152b is described below

commit 5ba7d8152b6f157676bd03820eb917009cb155a0
Author: Wail Alkowaileet <wa...@gmail.com>
AuthorDate: Sun Mar 26 10:30:07 2023 -0700

    [ASTERIXDB-3153][OTH] Make the default storage format configurable
    
    - user model changes: yes
    - storage format changes: no
    - interface changes: no
    
    Details:
    Currently, columnar datasets must be declared explicitly using
    the WITH clause. We should extend AsterixDB's capability
    to configure the default storage format (either row or column).
    
    Change-Id: I173dd026528aa4d35dbdddcf1de4a55249c19caf
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17447
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Wail Alkowaileet <wa...@gmail.com>
    Reviewed-by: Murtadha Al Hubail <mh...@apache.org>
---
 .../asterix/app/translator/QueryTranslator.java     |  4 ++--
 .../asterix/common/config/StorageProperties.java    |  9 ++++++++-
 .../asterix/lang/common/statement/DatasetDecl.java  | 21 ++++++++++++++-------
 .../asterix/metadata/dataset/DatasetFormatInfo.java |  6 +++++-
 .../apache/asterix/metadata/entities/Dataset.java   |  2 +-
 .../DatasetTupleTranslator.java                     |  4 ++--
 .../DatasetTupleTranslatorTest.java                 |  2 +-
 .../IndexTupleTranslatorTest.java                   |  2 +-
 .../apache/asterix/object/base/AdmObjectNode.java   | 14 ++++++++++++++
 9 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
index 15a8238a6b..3fa74230bc 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
@@ -748,8 +748,8 @@ public class QueryTranslator extends AbstractLangTranslator implements IStatemen
         boolean itemTypeAdded = false, metaItemTypeAdded = false;
 
         StorageProperties storageProperties = metadataProvider.getStorageProperties();
-        DatasetFormatInfo datasetFormatInfo = dd.getDatasetFormatInfo(storageProperties.getColumnMaxTupleCount(),
-                storageProperties.getColumnFreeSpaceTolerance());
+        DatasetFormatInfo datasetFormatInfo = dd.getDatasetFormatInfo(storageProperties.getStorageFormat(),
+                storageProperties.getColumnMaxTupleCount(), storageProperties.getColumnFreeSpaceTolerance());
         try {
             // Check if the dataverse exists
             Dataverse dv = MetadataManager.INSTANCE.getDataverse(mdTxnCtx, dataverseName);
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java
index 5b99fa0e61..073da971f7 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java
@@ -63,7 +63,8 @@ public class StorageProperties extends AbstractProperties {
         STORAGE_GLOBAL_CLEANUP(BOOLEAN, true),
         STORAGE_GLOBAL_CLEANUP_TIMEOUT(POSITIVE_INTEGER, (int) TimeUnit.MINUTES.toSeconds(10)),
         STORAGE_COLUMN_MAX_TUPLE_COUNT(NONNEGATIVE_INTEGER, 15000),
-        STORAGE_COLUMN_FREE_SPACE_TOLERANCE(DOUBLE, 0.15);
+        STORAGE_COLUMN_FREE_SPACE_TOLERANCE(DOUBLE, 0.15),
+        STORAGE_FORMAT(STRING, "row");
 
         private final IOptionType interpreter;
         private final Object defaultValue;
@@ -136,6 +137,8 @@ public class StorageProperties extends AbstractProperties {
                 case STORAGE_COLUMN_FREE_SPACE_TOLERANCE:
                     return "The percentage of the maximum tolerable empty space for a physical mega leaf page (e.g.,"
                             + " 0.15 means a physical page with 15% or less empty space is tolerable)";
+                case STORAGE_FORMAT:
+                    return "The default storage format (either row or column)";
                 default:
                     throw new IllegalStateException("NYI: " + this);
             }
@@ -280,4 +283,8 @@ public class StorageProperties extends AbstractProperties {
     public float getColumnFreeSpaceTolerance() {
         return (float) accessor.getDouble(Option.STORAGE_COLUMN_FREE_SPACE_TOLERANCE);
     }
+
+    public String getStorageFormat() {
+        return accessor.getString(Option.STORAGE_FORMAT);
+    }
 }
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java
index 8f48db0ff4..b8d1bfdc1d 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java
@@ -134,14 +134,21 @@ public class DatasetDecl extends AbstractStatement {
                 .getOptionalString(DatasetDeclParametersUtil.STORAGE_BLOCK_COMPRESSION_SCHEME_PARAMETER_NAME);
     }
 
-    public DatasetFormatInfo getDatasetFormatInfo(int defaultMaxTupleCount, float defaultFreeSpaceTolerance) {
-        final AdmObjectNode datasetFormatNode =
-                (AdmObjectNode) withObjectNode.get(DatasetDeclParametersUtil.DATASET_FORMAT_PARAMETER_NAME);
-        if (datasetType != DatasetType.INTERNAL || datasetFormatNode == null) {
-            return DatasetFormatInfo.DEFAULT;
+    public DatasetFormatInfo getDatasetFormatInfo(String defaultFormat, int defaultMaxTupleCount,
+            float defaultFreeSpaceTolerance) {
+        if (datasetType != DatasetType.INTERNAL) {
+            return DatasetFormatInfo.SYSTEM_DEFAULT;
         }
-        DatasetConfig.DatasetFormat datasetFormat = DatasetConfig.DatasetFormat.getFormat(
-                datasetFormatNode.getOptionalString(DatasetDeclParametersUtil.DATASET_FORMAT_FORMAT_PARAMETER_NAME));
+
+        AdmObjectNode datasetFormatNode = (AdmObjectNode) withObjectNode
+                .getOrDefault(DatasetDeclParametersUtil.DATASET_FORMAT_PARAMETER_NAME, AdmObjectNode.EMPTY);
+        DatasetConfig.DatasetFormat datasetFormat = DatasetConfig.DatasetFormat.getFormat(datasetFormatNode
+                .getOptionalString(DatasetDeclParametersUtil.DATASET_FORMAT_FORMAT_PARAMETER_NAME, defaultFormat));
+
+        if (datasetFormat == DatasetConfig.DatasetFormat.ROW) {
+            return DatasetFormatInfo.SYSTEM_DEFAULT;
+        }
+
         int maxTupleCount = datasetFormatNode.getOptionalInt(
                 DatasetDeclParametersUtil.DATASET_FORMAT_MAX_TUPLE_COUNT_PARAMETER_NAME, defaultMaxTupleCount);
         float freeSpaceTolerance = datasetFormatNode.getOptionalFloat(
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java
index 38951a4e2b..86d923337e 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java
@@ -20,11 +20,15 @@ package org.apache.asterix.metadata.dataset;
 
 import java.io.Serializable;
 
+import org.apache.asterix.common.config.DatasetConfig;
 import org.apache.asterix.common.config.DatasetConfig.DatasetFormat;
 
 public class DatasetFormatInfo implements Serializable {
     private static final long serialVersionUID = 7656132322813253435L;
-    public static final DatasetFormatInfo DEFAULT = new DatasetFormatInfo();
+    /**
+     * System's default format for non-{@link DatasetConfig.DatasetType#INTERNAL} datasets
+     */
+    public static final DatasetFormatInfo SYSTEM_DEFAULT = new DatasetFormatInfo();
     private final DatasetFormat format;
     private final int maxTupleCount;
     private final float freeSpaceTolerance;
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java
index a35be40f9c..c0f2dddff4 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java
@@ -164,7 +164,7 @@ public class Dataset implements IMetadataEntity<Dataset>, IDataset {
             DatasetType datasetType, int datasetId, int pendingOp) {
         this(dataverseName, datasetName, recordTypeDataverseName, recordTypeName, /*metaTypeDataverseName*/null,
                 /*metaTypeName*/null, nodeGroupName, compactionPolicy, compactionPolicyProperties, datasetDetails,
-                hints, datasetType, datasetId, pendingOp, CompressionManager.NONE, DatasetFormatInfo.DEFAULT);
+                hints, datasetType, datasetId, pendingOp, CompressionManager.NONE, DatasetFormatInfo.SYSTEM_DEFAULT);
     }
 
     public Dataset(DataverseName dataverseName, String datasetName, DataverseName itemTypeDataverseName,
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java
index eafa331dcb..790faa5f0c 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java
@@ -433,7 +433,7 @@ public class DatasetTupleTranslator extends AbstractTupleTranslator<Dataset> {
         int datasetFormatIndex =
                 datasetType.getFieldIndex(MetadataRecordTypes.DATASET_ARECORD_DATASET_FORMAT_FIELD_NAME);
         if (datasetFormatIndex < 0) {
-            return DatasetFormatInfo.DEFAULT;
+            return DatasetFormatInfo.SYSTEM_DEFAULT;
         }
 
         ARecordType datasetFormatType = (ARecordType) datasetType.getFieldTypes()[datasetFormatIndex];
@@ -676,7 +676,7 @@ public class DatasetTupleTranslator extends AbstractTupleTranslator<Dataset> {
 
     private void writeDatasetFormatInfo(Dataset dataset) throws HyracksDataException {
         DatasetFormatInfo info = dataset.getDatasetFormatInfo();
-        if (DatasetFormatInfo.DEFAULT == info) {
+        if (DatasetFormatInfo.SYSTEM_DEFAULT == info) {
             return;
         }
 
diff --git a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java
index 292ff16d4d..b6f9df766d 100644
--- a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java
+++ b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java
@@ -57,7 +57,7 @@ public class DatasetTupleTranslatorTest {
                     DataverseName.createSinglePartName("foo"), "LogType", DataverseName.createSinglePartName("CB"),
                     "MetaType", "DEFAULT_NG_ALL_NODES", "prefix", compactionPolicyProperties, details,
                     Collections.emptyMap(), DatasetType.INTERNAL, 115, 0, CompressionManager.NONE,
-                    DatasetFormatInfo.DEFAULT);
+                    DatasetFormatInfo.SYSTEM_DEFAULT);
             DatasetTupleTranslator dtTranslator = new DatasetTupleTranslator(true);
             ITupleReference tuple = dtTranslator.getTupleFromMetadataEntity(dataset);
             Dataset deserializedDataset = dtTranslator.getMetadataEntityFromTuple(tuple);
diff --git a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java
index 77c64d736c..9f5447876f 100644
--- a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java
+++ b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java
@@ -70,7 +70,7 @@ public class IndexTupleTranslatorTest {
             DataverseName dvCB = DataverseName.createSinglePartName("CB");
             Dataset dataset = new Dataset(dvTest, "d1", dvFoo, "LogType", dvCB, "MetaType", "DEFAULT_NG_ALL_NODES",
                     "prefix", compactionPolicyProperties, details, Collections.emptyMap(), DatasetType.INTERNAL, 115, 0,
-                    CompressionManager.NONE, DatasetFormatInfo.DEFAULT);
+                    CompressionManager.NONE, DatasetFormatInfo.SYSTEM_DEFAULT);
 
             Index index = new Index(dvTest, "d1", "i1", IndexType.BTREE,
                     Collections.singletonList(Collections.singletonList("row_id")),
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java
index 966b9bae88..bcabb18dd3 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java
@@ -68,6 +68,15 @@ public class AdmObjectNode implements IAdmNode {
         return children.get(fieldName);
     }
 
+    public IAdmNode getOrDefault(String fieldName, IAdmNode defaultValue) {
+        IAdmNode node = get(fieldName);
+        return node != null ? node : defaultValue;
+    }
+
+    public IAdmNode getOrEmpty(String fieldName) {
+        return children.get(fieldName);
+    }
+
     public Set<String> getFieldNames() {
         return children.keySet();
     }
@@ -162,6 +171,11 @@ public class AdmObjectNode implements IAdmNode {
         return ((AdmStringNode) node).get();
     }
 
+    public String getOptionalString(String field, String defaultValue) {
+        String value = getOptionalString(field);
+        return value != null ? value : defaultValue;
+    }
+
     public int getOptionalInt(String field, int defaultValue) {
         final IAdmNode node = get(field);
         if (node == null) {