You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by af...@apache.org on 2012/06/27 23:41:32 UTC

svn commit: r1354746 - in /accumulo/branches/ACCUMULO-652/core/src: main/java/org/apache/accumulo/core/file/rfile/ test/java/org/apache/accumulo/core/file/rfile/

Author: afuchs
Date: Wed Jun 27 21:41:31 2012
New Revision: 1354746

URL: http://svn.apache.org/viewvc?rev=1354746&view=rev
Log:
ACCUMULO-652 limited size of minimumVisibility and consolidated code for block stats

Modified:
    accumulo/branches/ACCUMULO-652/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
    accumulo/branches/ACCUMULO-652/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
    accumulo/branches/ACCUMULO-652/core/src/test/java/org/apache/accumulo/core/file/rfile/MultiLevelIndexTest.java

Modified: accumulo/branches/ACCUMULO-652/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
URL: http://svn.apache.org/viewvc/accumulo/branches/ACCUMULO-652/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java?rev=1354746&r1=1354745&r2=1354746&view=diff
==============================================================================
--- accumulo/branches/ACCUMULO-652/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java (original)
+++ accumulo/branches/ACCUMULO-652/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java Wed Jun 27 21:41:31 2012
@@ -47,21 +47,15 @@ public class MultiLevelIndex {
   
   public static class IndexEntry implements WritableComparable<IndexEntry> {
     private Key key;
-    private long minTimestamp;
-    private long maxTimestamp;
-    private ColumnVisibility minimumVisibility = null;
-    private int entries;
+    private BlockStats blockStats;
     private long offset;
     private long compressedSize;
     private long rawSize;
     private int format;
     
-    IndexEntry(Key k, long minTimestamp, long maxTimestamp, ColumnVisibility minimumVisibility, int e, long offset, long compressedSize, long rawSize, int version) {
+    IndexEntry(Key k, BlockStats blockStats, long offset, long compressedSize, long rawSize, int version) {
       this.key = k;
-      this.minTimestamp = minTimestamp;
-      this.maxTimestamp = maxTimestamp;
-      this.minimumVisibility = minimumVisibility;
-      this.entries = e;
+      this.blockStats = blockStats;
       this.offset = offset;
       this.compressedSize = compressedSize;
       this.rawSize = rawSize;
@@ -76,20 +70,8 @@ public class MultiLevelIndex {
     public void readFields(DataInput in) throws IOException {
       key = new Key();
       key.readFields(in);
-      if(format == RFile.RINDEX_VER_7)
-      {
-        minTimestamp = in.readLong();
-        maxTimestamp = in.readLong();
-        byte[] visibility = new byte[in.readInt()];
-        in.readFully(visibility);
-        minimumVisibility = new ColumnVisibility(visibility);
-      }
-      else
-      {
-        minTimestamp = Long.MIN_VALUE;
-        maxTimestamp = Long.MAX_VALUE;
-      }
-      entries = in.readInt();
+      blockStats = new BlockStats(format);
+      blockStats.readFields(in);
       if (format == RFile.RINDEX_VER_6 || format == RFile.RINDEX_VER_7) {
         offset = Utils.readVLong(in);
         compressedSize = Utils.readVLong(in);
@@ -104,15 +86,7 @@ public class MultiLevelIndex {
     @Override
     public void write(DataOutput out) throws IOException {
       key.write(out);
-      if(format == RFile.RINDEX_VER_7)
-      {
-        out.writeLong(minTimestamp);
-        out.writeLong(maxTimestamp);
-        byte[] visibility = minimumVisibility.getExpression();
-        out.writeInt(visibility.length);
-        out.write(visibility);
-      }
-      out.writeInt(entries);
+      blockStats.write(out);
       if (format == RFile.RINDEX_VER_6 || format == RFile.RINDEX_VER_7) {
         Utils.writeVLong(out, offset);
         Utils.writeVLong(out, compressedSize);
@@ -125,7 +99,7 @@ public class MultiLevelIndex {
     }
     
     public int getNumEntries() {
-      return entries;
+      return blockStats.entries;
     }
     
     public long getOffset() {
@@ -233,9 +207,7 @@ public class MultiLevelIndex {
     private ByteArrayOutputStream indexBytes;
     private DataOutputStream indexOut;
     
-    private long minTimestamp = Long.MAX_VALUE;
-    private long maxTimestamp = Long.MIN_VALUE;
-    private ColumnVisibility minimumVisibility = null;
+    private BlockStats blockStats = new BlockStats();
     
     private ArrayList<Integer> offsets;
     private int level;
@@ -256,17 +228,10 @@ public class MultiLevelIndex {
     
     public IndexBlock() {}
     
-    public void add(Key key, long minTimestamp, long maxTimestamp, ColumnVisibility minimumVisibility, int value, long offset, long compressedSize, long rawSize, int version) throws IOException {
+    public void add(Key key, BlockStats blockStats, long offset, long compressedSize, long rawSize, int version) throws IOException {
       offsets.add(indexOut.size());
-      if (this.minTimestamp > minTimestamp)
-        this.minTimestamp = minTimestamp;
-      if (this.maxTimestamp < maxTimestamp)
-        this.maxTimestamp = maxTimestamp;
-      if(this.minimumVisibility == null)
-        this.minimumVisibility = minimumVisibility;
-      else
-        this.minimumVisibility = this.minimumVisibility.or(minimumVisibility);
-      new IndexEntry(key, minTimestamp, maxTimestamp, minimumVisibility, value, offset, compressedSize, rawSize, version).write(indexOut);
+      this.blockStats.updateBlockStats(blockStats);
+      new IndexEntry(key, blockStats, offset, compressedSize, rawSize, version).write(indexOut);
     }
     
     int getSize() {
@@ -414,7 +379,7 @@ public class MultiLevelIndex {
       IndexEntry ie = new IndexEntry(version);
       for (int i = 0; i < buffered; i++) {
         ie.readFields(dis);
-        writer.add(ie.getKey(), ie.minTimestamp, ie.maxTimestamp, ie.minimumVisibility, ie.getNumEntries(), ie.getOffset(), ie.getCompressedSize(), ie.getRawSize(), ie.format);
+        writer.add(ie.getKey(), ie.blockStats, ie.getOffset(), ie.getCompressedSize(), ie.getRawSize(), ie.format);
       }
       
       buffered = 0;
@@ -423,18 +388,18 @@ public class MultiLevelIndex {
       
     }
     
-    public void add(Key key, long minTimestamp, long maxTimestamp, ColumnVisibility minimumVisibility, int data, long offset, long compressedSize, long rawSize, int version) throws IOException {
+    public void add(Key key, BlockStats blockStats, long offset, long compressedSize, long rawSize, int version) throws IOException {
       if (buffer.size() > (10 * 1 << 20)) {
         flush();
       }
       
-      new IndexEntry(key, minTimestamp, maxTimestamp, minimumVisibility, data, offset, compressedSize, rawSize, version).write(buffer);
+      new IndexEntry(key, blockStats, offset, compressedSize, rawSize, version).write(buffer);
       buffered++;
     }
     
-    public void addLast(Key key, long minTimestamp, long maxTimestamp, ColumnVisibility minimumVisibility, int data, long offset, long compressedSize, long rawSize, int version) throws IOException {
+    public void addLast(Key key, BlockStats blockStats, long offset, long compressedSize, long rawSize, int version) throws IOException {
       flush();
-      writer.addLast(key, minTimestamp, maxTimestamp, minimumVisibility, data, offset, compressedSize, rawSize, version);
+      writer.addLast(key, blockStats, offset, compressedSize, rawSize, version);
     }
     
     public void close(DataOutput out) throws IOException {
@@ -459,7 +424,7 @@ public class MultiLevelIndex {
       levels = new ArrayList<IndexBlock>();
     }
     
-    private void add(int level, Key key, long minTimestamp, long maxTimestamp, ColumnVisibility minimumVisibility, int data, long offset, long compressedSize, long rawSize, boolean last, int version)
+    private void add(int level, Key key, BlockStats blockStats, long offset, long compressedSize, long rawSize, boolean last, int version)
         throws IOException {
       if (level == levels.size()) {
         levels.add(new IndexBlock(level, 0));
@@ -467,7 +432,7 @@ public class MultiLevelIndex {
       
       IndexBlock iblock = levels.get(level);
       
-      iblock.add(key, minTimestamp, maxTimestamp, minimumVisibility, data, offset, compressedSize, rawSize, version);
+      iblock.add(key, blockStats, offset, compressedSize, rawSize, version);
       
       if (last && level == levels.size() - 1)
         return;
@@ -478,7 +443,7 @@ public class MultiLevelIndex {
         iblock.write(out);
         out.close();
         
-        add(level + 1, key, iblock.minTimestamp, iblock.maxTimestamp, iblock.minimumVisibility, 0, out.getStartPos(), out.getCompressedSize(), out.getRawSize(), last, version);
+        add(level + 1, key, blockStats, out.getStartPos(), out.getCompressedSize(), out.getRawSize(), last, version);
         
         if (last)
           levels.set(level, null);
@@ -487,17 +452,17 @@ public class MultiLevelIndex {
       }
     }
     
-    public void add(Key key, long minTimestamp, long maxTimestamp, ColumnVisibility minimumVisibility, int data, long offset, long compressedSize, long rawSize, int version) throws IOException {
+    public void add(Key key, BlockStats blockStats, long offset, long compressedSize, long rawSize, int version) throws IOException {
       totalAdded++;
-      add(0, key, minTimestamp, maxTimestamp, minimumVisibility, data, offset, compressedSize, rawSize, false, version);
+      add(0, key, blockStats, offset, compressedSize, rawSize, false, version);
     }
     
-    public void addLast(Key key, long minTimestamp, long maxTimestamp, ColumnVisibility minimumVisibility, int data, long offset, long compressedSize, long rawSize, int version) throws IOException {
+    public void addLast(Key key, BlockStats blockStats, long offset, long compressedSize, long rawSize, int version) throws IOException {
       if (addedLast)
         throw new IllegalStateException("already added last");
       
       totalAdded++;
-      add(0, key, minTimestamp, maxTimestamp, minimumVisibility, data, offset, compressedSize, rawSize, true, version);
+      add(0, key, blockStats, offset, compressedSize, rawSize, true, version);
       addedLast = true;
       
     }
@@ -548,7 +513,7 @@ public class MultiLevelIndex {
       
       private final boolean checkFilterIndexEntry(IndexEntry ie) {
         if(timestampFilter == null)
-        if (timestampFilter != null && (ie.maxTimestamp < timestampFilter.startTimestamp || ie.minTimestamp > timestampFilter.endTimestamp)) {
+        if (timestampFilter != null && (ie.blockStats.maxTimestamp < timestampFilter.startTimestamp || ie.blockStats.minTimestamp > timestampFilter.endTimestamp)) {
           return false;
         }
         return true;

Modified: accumulo/branches/ACCUMULO-652/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
URL: http://svn.apache.org/viewvc/accumulo/branches/ACCUMULO-652/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java?rev=1354746&r1=1354745&r2=1354746&view=diff
==============================================================================
--- accumulo/branches/ACCUMULO-652/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java (original)
+++ accumulo/branches/ACCUMULO-652/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java Wed Jun 27 21:41:31 2012
@@ -302,15 +302,12 @@ public class RFile {
     private ABlockWriter blockWriter;
     
     // private BlockAppender blockAppender;
-    private long blockSize = 100000;
-    private int indexBlockSize;
-    private int entries = 0;
-    
+    private final long blockSize;
+    private final int indexBlockSize;
+
     // some aggregate stats to keep on a per-block basis
-    private long minTimestamp = Long.MAX_VALUE;
-    private long maxTimestamp = Long.MIN_VALUE;
-    private ColumnVisibility minimumVisibility = null;
-    
+    private BlockStats blockStats = new BlockStats();
+        
     private ArrayList<LocalityGroupMetadata> localityGroups = new ArrayList<LocalityGroupMetadata>();
     private LocalityGroupMetadata currentLocalityGroup = null;
     private int nextBlock = 0;
@@ -378,27 +375,7 @@ public class RFile {
       }
     }
     
-    private void updateBlockStats(Key key, Value value)
-    {
-      if(minTimestamp > key.getTimestamp())
-        minTimestamp = key.getTimestamp();
-      if(maxTimestamp < key.getTimestamp())
-        maxTimestamp = key.getTimestamp();
-      if(minimumVisibility == null)
-        minimumVisibility = new ColumnVisibility(key.getColumnVisibility());
-      else
-        minimumVisibility = minimumVisibility.or(new ColumnVisibility(key.getColumnVisibility()));
-      entries++;
-    }
-    
-    private void clearBlockStats()
-    {
-      minTimestamp = Long.MAX_VALUE;
-      maxTimestamp = Long.MIN_VALUE;
-      minimumVisibility = null;      
-      entries = 0;
-    }
-    
+
     public void append(Key key, Value value) throws IOException {
       if (dataClosed) {
         throw new IllegalStateException("Cannont append, data closed");
@@ -425,7 +402,7 @@ public class RFile {
       
       rk.write(blockWriter);
       value.write(blockWriter);
-      updateBlockStats(key,value);
+      blockStats.updateBlockStats(key,value);
       
       
       prevKey = new Key(key);
@@ -437,11 +414,11 @@ public class RFile {
       blockWriter.close();
       
       if (lastBlock)
-        currentLocalityGroup.indexWriter.addLast(key, minTimestamp, maxTimestamp, minimumVisibility, entries, blockWriter.getStartPos(), blockWriter.getCompressedSize(), blockWriter.getRawSize(), RINDEX_VER_7);
+        currentLocalityGroup.indexWriter.addLast(key, blockStats, blockWriter.getStartPos(), blockWriter.getCompressedSize(), blockWriter.getRawSize(), RINDEX_VER_7);
       else
-        currentLocalityGroup.indexWriter.add(key, minTimestamp, maxTimestamp, minimumVisibility, entries, blockWriter.getStartPos(), blockWriter.getCompressedSize(), blockWriter.getRawSize(), RINDEX_VER_7);
+        currentLocalityGroup.indexWriter.add(key, blockStats, blockWriter.getStartPos(), blockWriter.getCompressedSize(), blockWriter.getRawSize(), RINDEX_VER_7);
       
-      clearBlockStats();
+      blockStats = new BlockStats();
       blockWriter = null;
       lastKeyInBlock = null;
       nextBlock++;

Modified: accumulo/branches/ACCUMULO-652/core/src/test/java/org/apache/accumulo/core/file/rfile/MultiLevelIndexTest.java
URL: http://svn.apache.org/viewvc/accumulo/branches/ACCUMULO-652/core/src/test/java/org/apache/accumulo/core/file/rfile/MultiLevelIndexTest.java?rev=1354746&r1=1354745&r2=1354746&view=diff
==============================================================================
--- accumulo/branches/ACCUMULO-652/core/src/test/java/org/apache/accumulo/core/file/rfile/MultiLevelIndexTest.java (original)
+++ accumulo/branches/ACCUMULO-652/core/src/test/java/org/apache/accumulo/core/file/rfile/MultiLevelIndexTest.java Wed Jun 27 21:41:31 2012
@@ -59,9 +59,10 @@ public class MultiLevelIndexTest extends
     BufferedWriter mliw = new BufferedWriter(new Writer(_cbw, maxBlockSize));
     
     for (int i = 0; i < num; i++)
-      mliw.add(new Key(String.format("%05d000", i)), 0l, 0l, new ColumnVisibility(), i, 0, 0, 0, RFile.RINDEX_VER_7);
-    
-    mliw.addLast(new Key(String.format("%05d000", num)), 0l, 0l, new ColumnVisibility(), num, 0, 0, 0, RFile.RINDEX_VER_7);
+    {
+      mliw.add(new Key(String.format("%05d000", i)), new BlockStats(0,0,new ColumnVisibility(),i), 0, 0, 0, RFile.RINDEX_VER_7);
+    }
+    mliw.addLast(new Key(String.format("%05d000", num)), new BlockStats(0,0,new ColumnVisibility(),num), 0, 0, 0, RFile.RINDEX_VER_7);
     
     ABlockWriter root = _cbw.prepareMetaBlock("root");
     mliw.close(root);