You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by lu...@apache.org on 2017/11/01 04:12:13 UTC

asterixdb git commit: [ASTERIXDB-2133] Fix unncessary binary search in GroupFrameAccessor

Repository: asterixdb
Updated Branches:
  refs/heads/master a22ca7bf8 -> c04046c11


[ASTERIXDB-2133] Fix unncessary binary search in GroupFrameAccessor

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- GroupFrameAccessor holds a list of frames from a run during the merge
step of merge sort. However, everytime we access a tuple, it performs
binary search to get the physical tuple index. This patch fixes this
by remembering the last accessed frame. It is expected that tuples
are accessed sequentially (since it's the merge step), which greatly
reduces binary searches

Change-Id: I4a1b19ad47f6b1dda4bd5c417932e4c9ba36a714
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2079
Sonar-Qube: Jenkins <je...@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <je...@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
Reviewed-by: Ian Maxon <im...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/c04046c1
Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/c04046c1
Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/c04046c1

Branch: refs/heads/master
Commit: c04046c11be062c4320ba7e0bbd5258ffb600ad3
Parents: a22ca7b
Author: luochen01 <cl...@uci.edu>
Authored: Fri Oct 27 09:14:19 2017 -0700
Committer: Luo Chen <cl...@uci.edu>
Committed: Tue Oct 31 21:11:25 2017 -0700

----------------------------------------------------------------------
 .../std/sort/util/GroupFrameAccessor.java       | 29 ++++++++++++--------
 1 file changed, 18 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c04046c1/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java
index cac50e7..bf61435 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java
@@ -57,10 +57,13 @@ public class GroupFrameAccessor implements IFrameTupleAccessor {
     private final RecordDescriptor recordDescriptor;
     private final int minFrameSize;
     private final FrameTupleAccessor frameTupleAccessor;
-    private int lastTupleIndex;
     private int lastFrameId;
+    // the start tuple index of the last accessed frame (inclusive)
+    private int lastFrameStart;
+    // the end tuple index of the last accessed frame (exclusive)
+    private int lastFrameEnd;
     private ByteBuffer buffer;
-    private List<InnerFrameInfo> innerFrameInfos;
+    private final List<InnerFrameInfo> innerFrameInfos;
 
     public GroupFrameAccessor(int minFrameSize, RecordDescriptor recordDescriptor) {
         this.minFrameSize = minFrameSize;
@@ -127,8 +130,9 @@ public class GroupFrameAccessor implements IFrameTupleAccessor {
     @Override
     public void reset(ByteBuffer buffer) {
         this.buffer = buffer;
-        this.lastTupleIndex = -1;
         this.lastFrameId = -1;
+        this.lastFrameStart = -1;
+        this.lastFrameEnd = -1;
         parseGroupedBuffer(0, buffer.limit());
     }
 
@@ -153,12 +157,13 @@ public class GroupFrameAccessor implements IFrameTupleAccessor {
 
     private int resetSubTupleAccessor(int tupleIndex) {
         assert tupleIndex < getTupleCount();
-        if (innerFrameInfos.size() == 1) {
-            return tupleIndex;
-        }
-        if (tupleIndex == lastTupleIndex) {
-            return lastFrameId > 0 ? lastTupleIndex - innerFrameInfos.get(lastFrameId - 1).tupleCount : lastTupleIndex;
+        if (tupleIndex >= lastFrameStart && tupleIndex < lastFrameEnd) {
+            // a special optimization path
+            // since GroupFrameAccessor is used by merge, it is expected that tuples are accessed sequentially
+            // thus, if tuple still fit into the last frame, we do not need to perform binary search
+            return tupleIndex - lastFrameStart;
         }
+        // we perform binary search to get the frame Id
         int subFrameId = Collections.binarySearch(innerFrameInfos, tupleIndex);
         if (subFrameId >= 0) {
             subFrameId++;
@@ -166,9 +171,11 @@ public class GroupFrameAccessor implements IFrameTupleAccessor {
             subFrameId = -subFrameId - 1;
         }
         frameTupleAccessor.reset(buffer, innerFrameInfos.get(subFrameId).start, innerFrameInfos.get(subFrameId).length);
-        lastTupleIndex = tupleIndex;
         lastFrameId = subFrameId;
-        return lastFrameId > 0 ? lastTupleIndex - innerFrameInfos.get(lastFrameId - 1).tupleCount : lastTupleIndex;
+        lastFrameStart = lastFrameId > 0 ? innerFrameInfos.get(lastFrameId - 1).tupleCount : 0;
+        lastFrameEnd = innerFrameInfos.get(lastFrameId).tupleCount;
+
+        return tupleIndex - lastFrameStart;
     }
 
-}
+}
\ No newline at end of file