You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mh...@apache.org on 2020/11/04 22:39:55 UTC

[asterixdb] branch master updated (b317f37 -> 01a301d)

This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git.


    from b317f37  Merge commit '95fa33c'
     new b1ae88e  [NO ISSUE][STO] Log bulkloader state on failure
     new c402e3a  [NO ISSUE][DOC] Document hash and hash-bcast hints
     new 01a301d  Merge remote-tracking branch 'gerrit/mad-hatter'

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 asterixdb/asterix-doc/pom.xml                      |  2 +-
 .../asterix-doc/src/main/markdown/sqlpp/0_toc.md   |  1 +
 .../src/main/markdown/sqlpp/appendix_2_hints.md    | 52 ++++++++++++++++++++++
 .../hyracks/hyracks-storage-am-btree/pom.xml       | 13 ++++++
 .../hyracks/storage/am/btree/impls/BTree.java      | 26 ++++++++++-
 .../hyracks/hyracks-storage-am-common/pom.xml      |  4 ++
 .../storage/am/common/api/ITreeIndexFrame.java     | 13 ++++++
 .../am/common/frames/TreeIndexNSMFrame.java        | 20 +++++++++
 8 files changed, 129 insertions(+), 2 deletions(-)
 create mode 100644 asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_hints.md


[asterixdb] 01/03: [NO ISSUE][STO] Log bulkloader state on failure

Posted by mh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit b1ae88e7e6bae81b6e9de5932ea16c8ffd077320
Author: Murtadha Hubail <mh...@apache.org>
AuthorDate: Sat Oct 31 23:35:56 2020 +0300

    [NO ISSUE][STO] Log bulkloader state on failure
    
    - user model changes: no
    - storage format changes: no
    - interface changes: yes
    
    Details:
    
    - Log BTree bulkloader state when an exception is encountered.
    
    Change-Id: I00a69d4883ae7a69c28a201408504049dd70ffb5
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/8683
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Ali Alsuliman <al...@gmail.com>
---
 .../hyracks/hyracks-storage-am-btree/pom.xml       | 13 +++++++++++
 .../hyracks/storage/am/btree/impls/BTree.java      | 26 +++++++++++++++++++++-
 .../hyracks/hyracks-storage-am-common/pom.xml      |  4 ++++
 .../storage/am/common/api/ITreeIndexFrame.java     | 13 +++++++++++
 .../am/common/frames/TreeIndexNSMFrame.java        | 20 +++++++++++++++++
 5 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/pom.xml b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/pom.xml
index fa74184..45330e8 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/pom.xml
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/pom.xml
@@ -107,5 +107,18 @@
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-lang3</artifactId>
     </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-databind</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hyracks</groupId>
+      <artifactId>hyracks-util</artifactId>
+      <version>${project.version}</version>
+    </dependency>
   </dependencies>
 </project>
\ No newline at end of file
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java
index 3a062af..f62f2d1 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java
@@ -69,11 +69,16 @@ import org.apache.hyracks.storage.common.buffercache.IBufferCache;
 import org.apache.hyracks.storage.common.buffercache.ICachedPage;
 import org.apache.hyracks.storage.common.buffercache.IPageWriteCallback;
 import org.apache.hyracks.storage.common.file.BufferedFileHandle;
+import org.apache.hyracks.util.JSONUtil;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
 
 public class BTree extends AbstractTreeIndex {
 
     public static final float DEFAULT_FILL_FACTOR = 0.7f;
-
+    private static final Logger LOGGER = LogManager.getLogger();
     private static final long RESTART_OP = Long.MIN_VALUE;
     private static final long FULL_RESTART_OP = Long.MIN_VALUE + 1;
     private static final int MAX_RESTARTS = 10;
@@ -1086,6 +1091,7 @@ public class BTree extends AbstractTreeIndex {
                 }
                 ((IBTreeLeafFrame) leafFrame).insertSorted(tuple);
             } catch (HyracksDataException | RuntimeException e) {
+                logState(tuple, e);
                 handleException();
                 throw e;
             }
@@ -1194,6 +1200,24 @@ public class BTree extends AbstractTreeIndex {
         public void abort() throws HyracksDataException {
             super.handleException();
         }
+
+        private void logState(ITupleReference tuple, Exception e) {
+            try {
+                ObjectNode state = JSONUtil.createObject();
+                state.set("leafFrame", leafFrame.getState());
+                state.set("interiorFrame", interiorFrame.getState());
+                int tupleSize = Math.max(leafFrame.getBytesRequiredToWriteTuple(tuple),
+                        interiorFrame.getBytesRequiredToWriteTuple(tuple));
+                state.put("tupleSize", tupleSize);
+                state.put("spaceNeeded", tupleWriter.bytesRequired(tuple) + slotSize);
+                state.put("spaceUsed", leafFrame.getBuffer().capacity() - leafFrame.getTotalFreeSpace());
+                state.put("leafMaxBytes", leafMaxBytes);
+                state.put("maxTupleSize", maxTupleSize);
+                LOGGER.error("failed to add tuple {}", state, e);
+            } catch (Throwable t) {
+                e.addSuppressed(t);
+            }
+        }
     }
 
     @SuppressWarnings("rawtypes")
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/pom.xml b/hyracks-fullstack/hyracks/hyracks-storage-am-common/pom.xml
index 31463b7..703deb8 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/pom.xml
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/pom.xml
@@ -109,5 +109,9 @@
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-databind</artifactId>
     </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-core</artifactId>
+    </dependency>
   </dependencies>
 </project>
\ No newline at end of file
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java
index dc59612..18d5653 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java
@@ -28,6 +28,9 @@ import org.apache.hyracks.storage.common.MultiComparator;
 import org.apache.hyracks.storage.common.buffercache.IBufferCache;
 import org.apache.hyracks.storage.common.buffercache.ICachedPage;
 import org.apache.hyracks.storage.common.buffercache.IExtraPageBlockHelper;
+import org.apache.hyracks.util.JSONUtil;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
 
 public interface ITreeIndexFrame {
 
@@ -122,4 +125,14 @@ public interface ITreeIndexFrame {
     public ITupleReference getLeftmostTuple() throws HyracksDataException;
 
     public ITupleReference getRightmostTuple() throws HyracksDataException;
+
+    default ObjectNode getState() {
+        ObjectNode json = JSONUtil.createObject();
+        json.put("tupleCount", getTupleCount());
+        json.put("freeSpaceOff", getFreeSpaceOff());
+        json.put("level", getLevel());
+        json.put("pageLsn", getPageLsn());
+        json.put("totalFreeSpace", getTotalFreeSpace());
+        return json;
+    }
 }
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
index 6106358..08d4564 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
@@ -31,6 +31,10 @@ import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
 import org.apache.hyracks.storage.am.common.ophelpers.SlotOffTupleOff;
 import org.apache.hyracks.storage.common.buffercache.ICachedPage;
+import org.apache.hyracks.util.JSONUtil;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.node.ObjectNode;
 
 public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
 
@@ -354,4 +358,20 @@ public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
             return frameTuple;
         }
     }
+
+    @Override
+    public ObjectNode getState() {
+        ObjectNode state = ITreeIndexFrame.super.getState();
+        state.put("largeFlag", getLargeFlag());
+        return state;
+    }
+
+    @Override
+    public String toString() {
+        try {
+            return JSONUtil.convertNode(getState());
+        } catch (JsonProcessingException e) {
+            return "failed to convert json";
+        }
+    }
 }


[asterixdb] 03/03: Merge remote-tracking branch 'gerrit/mad-hatter'

Posted by mh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 01a301d975f37945dfe23a4968263b435a0763c1
Merge: b317f37 c402e3a
Author: Michael Blow <mb...@apache.org>
AuthorDate: Tue Nov 3 09:32:38 2020 -0500

    Merge remote-tracking branch 'gerrit/mad-hatter'
    
    Change-Id: Ia1402644a9ca2878c493293e9dcb92176d589736

 asterixdb/asterix-doc/pom.xml                      |  2 +-
 .../asterix-doc/src/main/markdown/sqlpp/0_toc.md   |  1 +
 .../src/main/markdown/sqlpp/appendix_2_hints.md    | 52 ++++++++++++++++++++++
 .../hyracks/hyracks-storage-am-btree/pom.xml       | 13 ++++++
 .../hyracks/storage/am/btree/impls/BTree.java      | 26 ++++++++++-
 .../hyracks/hyracks-storage-am-common/pom.xml      |  4 ++
 .../storage/am/common/api/ITreeIndexFrame.java     | 13 ++++++
 .../am/common/frames/TreeIndexNSMFrame.java        | 20 +++++++++
 8 files changed, 129 insertions(+), 2 deletions(-)

diff --cc asterixdb/asterix-doc/pom.xml
index 98e86b3,e1a1e69..88cc8cf
--- a/asterixdb/asterix-doc/pom.xml
+++ b/asterixdb/asterix-doc/pom.xml
@@@ -52,7 -52,7 +52,7 @@@
              <configuration>
                <target>
                  <concat destfile="${project.build.directory}/generated-site/markdown/sqlpp/manual.md">
-                   <filelist dir="${project.basedir}/src/main/markdown/sqlpp" files="0_toc.md,1_intro.md,2_expr_title.md,2_expr.md,3_query_title.md,3_declare_dataverse.md,3_declare_function.md,3_query.md,4_error_title.md,4_error.md,5_ddl_head.md,5_ddl_dataset_index.md,5_ddl_function_removal.md,5_ddl_dml.md,appendix_1_title.md,appendix_1_keywords.md,appendix_2_title.md,appendix_2_parameters.md,appendix_2_parallel_sort.md,appendix_2_index_only.md,appendix_2_interval_joins.md,appendix_3_tit [...]
 -                  <filelist dir="${project.basedir}/src/main/markdown/sqlpp" files="0_toc.md,1_intro.md,2_expr_title.md,2_expr.md,3_query_title.md,3_declare_dataverse.md,3_declare_function.md,3_query.md,4_error_title.md,4_error.md,5_ddl_head.md,5_ddl_dataset_index.md,5_ddl_function_removal.md,5_ddl_dml.md,appendix_1_title.md,appendix_1_keywords.md,appendix_2_title.md,appendix_2_parameters.md,appendix_2_parallel_sort.md,appendix_2_index_only.md,appendix_2_hints.md,appendix_3_title.md,app [...]
++                  <filelist dir="${project.basedir}/src/main/markdown/sqlpp" files="0_toc.md,1_intro.md,2_expr_title.md,2_expr.md,3_query_title.md,3_declare_dataverse.md,3_declare_function.md,3_query.md,4_error_title.md,4_error.md,5_ddl_head.md,5_ddl_dataset_index.md,5_ddl_function_removal.md,5_ddl_dml.md,appendix_1_title.md,appendix_1_keywords.md,appendix_2_title.md,appendix_2_parameters.md,appendix_2_parallel_sort.md,appendix_2_index_only.md,appendix_2_hints.md,appendix_2_interval_joi [...]
                  </concat>
                  <concat destfile="${project.build.directory}/generated-site/markdown/sqlpp/builtins.md">
                    <filelist dir="${project.basedir}/src/main/markdown/builtins" files="0_toc.md,0_toc_sqlpp.md,0_toc_common.md,1_numeric_common.md,1_numeric_delta.md,2_string_common.md,2_string_delta.md,3_binary.md,4_spatial.md,5_similarity.md,6_tokenizing.md,7_temporal.md,7_allens.md,8_record.md,9_aggregate_sql.md,10_comparison.md,11_type.md,13_conditional.md,12_misc.md,15_bitwise.md,14_window.md" />
diff --cc asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md
index f4e2cef,8680c43..ca1ca19
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md
@@@ -99,6 -98,6 +99,7 @@@
  * [Appendix 1. Reserved Keywords](#Reserved_keywords)
  * [Appendix 2. Performance Tuning](#Performance_tuning)
        * [Parallelism Parameter](#Parallelism_parameter)
 +      * [Interval Joins](#Interval_joins)
        * [Memory Parameters](#Memory_parameters)
+       * [Query Hints](#Query_hints)
  * [Appendix 3. Variable Bindings and Name Resolution](#Variable_bindings_and_name_resolution)


[asterixdb] 02/03: [NO ISSUE][DOC] Document hash and hash-bcast hints

Posted by mh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit c402e3a79f6320c72898424745fdab1e4da57405
Author: Dmitry Lychagin <dm...@couchbase.com>
AuthorDate: Fri Oct 30 11:01:33 2020 -0700

    [NO ISSUE][DOC] Document hash and hash-bcast hints
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    - Add documentation for hash and hash-bcast hints
    
    Change-Id: If36f35c994c05cb244d5c4a08fc75680d1251920
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/8604
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Dmitry Lychagin <dm...@couchbase.com>
    Reviewed-by: Till Westmann <ti...@apache.org>
---
 asterixdb/asterix-doc/pom.xml                      |  2 +-
 .../asterix-doc/src/main/markdown/sqlpp/0_toc.md   |  1 +
 .../src/main/markdown/sqlpp/appendix_2_hints.md    | 52 ++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/asterixdb/asterix-doc/pom.xml b/asterixdb/asterix-doc/pom.xml
index ea23f5e..e1a1e69 100644
--- a/asterixdb/asterix-doc/pom.xml
+++ b/asterixdb/asterix-doc/pom.xml
@@ -52,7 +52,7 @@
             <configuration>
               <target>
                 <concat destfile="${project.build.directory}/generated-site/markdown/sqlpp/manual.md">
-                  <filelist dir="${project.basedir}/src/main/markdown/sqlpp" files="0_toc.md,1_intro.md,2_expr_title.md,2_expr.md,3_query_title.md,3_declare_dataverse.md,3_declare_function.md,3_query.md,4_error_title.md,4_error.md,5_ddl_head.md,5_ddl_dataset_index.md,5_ddl_function_removal.md,5_ddl_dml.md,appendix_1_title.md,appendix_1_keywords.md,appendix_2_title.md,appendix_2_parameters.md,appendix_2_parallel_sort.md,appendix_2_index_only.md,appendix_3_title.md,appendix_3_resolution.md" />
+                  <filelist dir="${project.basedir}/src/main/markdown/sqlpp" files="0_toc.md,1_intro.md,2_expr_title.md,2_expr.md,3_query_title.md,3_declare_dataverse.md,3_declare_function.md,3_query.md,4_error_title.md,4_error.md,5_ddl_head.md,5_ddl_dataset_index.md,5_ddl_function_removal.md,5_ddl_dml.md,appendix_1_title.md,appendix_1_keywords.md,appendix_2_title.md,appendix_2_parameters.md,appendix_2_parallel_sort.md,appendix_2_index_only.md,appendix_2_hints.md,appendix_3_title.md,appe [...]
                 </concat>
                 <concat destfile="${project.build.directory}/generated-site/markdown/sqlpp/builtins.md">
                   <filelist dir="${project.basedir}/src/main/markdown/builtins" files="0_toc.md,0_toc_sqlpp.md,0_toc_common.md,1_numeric_common.md,1_numeric_delta.md,2_string_common.md,2_string_delta.md,3_binary.md,4_spatial.md,5_similarity.md,6_tokenizing.md,7_temporal.md,7_allens.md,8_record.md,9_aggregate_sql.md,10_comparison.md,11_type.md,13_conditional.md,12_misc.md,15_bitwise.md,14_window.md" />
diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md
index 1a5a3e6..8680c43 100644
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md
@@ -99,4 +99,5 @@
 * [Appendix 2. Performance Tuning](#Performance_tuning)
       * [Parallelism Parameter](#Parallelism_parameter)
       * [Memory Parameters](#Memory_parameters)
+      * [Query Hints](#Query_hints)
 * [Appendix 3. Variable Bindings and Name Resolution](#Variable_bindings_and_name_resolution)
diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_hints.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_hints.md
new file mode 100644
index 0000000..0e4f470
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_hints.md
@@ -0,0 +1,52 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+## <a id="Query_hints">Query Hints</a>
+
+#### <a id="hash_groupby">"hash" GROUP BY hint</a>
+
+The system supports two algorithms for GROUP BY clause evaluation: pre-sorted and hash-based.
+By default it uses the pre-sorted approach: The input data is first sorted on the grouping fields
+and then aggregation is performed on that sorted data. The alternative is a hash-based strategy
+which can be enabled via a `/*+ hash */` GROUP BY hint: The data is aggregated using an in-memory hash-table
+(that can spill to disk if necessary). This approach is recommended for low-cardinality grouping fields.
+
+##### Example:
+
+    SELECT c.address.state, count(*)
+    FROM Customers AS c
+    /*+ hash */ GROUP BY c.address.state
+
+#### <a id="hash_bcast_join">"hash-bcast" JOIN hint</a>
+
+By default the system uses a partitioned-parallel hash join strategy to parallelize the execution of an
+equi-join. In this approach both sides of the join are repartitioned (if necessary) on a hash of the join key;
+potentially matching data items thus arrive at the same partition to be joined locally.
+This strategy is robust, but not always the fastest when one of the join sides is low cardinality and
+the other is high cardinality (since it scans and potentially moves the data from both sides).
+This special case can be better handled by broadcasting (replicating) the smaller side to all data partitions
+of the larger side and not moving the data from the other (larger) side. The system provides a join hint to enable
+this strategy: `/*+ hash-bcast */`. This hint forces the right side of the join to be replicated while the left side
+retains its original partitioning.
+
+##### Example:
+
+    SELECT *
+    FROM Orders AS o JOIN Customers AS c
+    ON o.customer_id /*+ hash-bcast */ = c.customer_id