You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by el...@apache.org on 2014/09/12 09:46:30 UTC

[01/10] git commit: ACCUMULO-1628 Fixes issue after previous changes which interrupted a deep-copy

Repository: accumulo
Updated Branches:
  refs/heads/1.5.2-SNAPSHOT c335fca72 -> 7699e1f43
  refs/heads/1.6.1-SNAPSHOT 0d76cd520 -> d54e0fd86
  refs/heads/master abf966e19 -> 6b5275e1b


ACCUMULO-1628 Fixes issue after previous changes which interrupted a deep-copy

Pushes the interrupt flag from the SourceSwitchingIterator down to the
FileManager and InMemoryMap. This should avoid passing the interrupt
into a deep copy which isn't supported. Adds some more tests which
previously caused the edge case which is now fixed.

Signed-off-by: Josh Elser <el...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/7699e1f4
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/7699e1f4
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/7699e1f4

Branch: refs/heads/1.5.2-SNAPSHOT
Commit: 7699e1f43c4ee51bfa4be1e9e73ea722f934a3d6
Parents: c335fca
Author: Keith Turner <kt...@apache.org>
Authored: Thu Sep 11 16:31:08 2014 -0700
Committer: Josh Elser <el...@apache.org>
Committed: Thu Sep 11 16:31:08 2014 -0700

----------------------------------------------------------------------
 .../system/SourceSwitchingIterator.java         | 20 ++++-------
 .../system/SourceSwitchingIteratorTest.java     | 38 +++++++++++++++++++-
 .../server/tabletserver/FileManager.java        | 13 +++++++
 .../server/tabletserver/InMemoryMap.java        | 21 ++++++++---
 .../accumulo/server/tabletserver/Tablet.java    |  5 +++
 .../server/tabletserver/InMemoryMapTest.java    | 38 ++++++++++++++++----
 6 files changed, 110 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java
index 33d0ebf..6c40176 100644
--- a/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java
@@ -47,6 +47,8 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
     DataSource getDeepCopyDataSource(IteratorEnvironment env);
 
     SortedKeyValueIterator<Key,Value> iterator() throws IOException;
+
+    void setInterruptFlag(AtomicBoolean flag);
   }
 
   private DataSource source;
@@ -60,20 +62,18 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
   private Collection<ByteSequence> columnFamilies;
 
   private boolean onlySwitchAfterRow;
-  private AtomicBoolean iflag;
 
   private final List<SourceSwitchingIterator> copies;
 
-  private SourceSwitchingIterator(DataSource source, boolean onlySwitchAfterRow, List<SourceSwitchingIterator> copies, AtomicBoolean iflag) {
+  private SourceSwitchingIterator(DataSource source, boolean onlySwitchAfterRow, List<SourceSwitchingIterator> copies) {
     this.source = source;
     this.onlySwitchAfterRow = onlySwitchAfterRow;
     this.copies = copies;
-    this.iflag = iflag;
     copies.add(this);
   }
 
   public SourceSwitchingIterator(DataSource source, boolean onlySwitchAfterRow) {
-    this(source, onlySwitchAfterRow, Collections.synchronizedList(new ArrayList<SourceSwitchingIterator>()), null);
+    this(source, onlySwitchAfterRow, Collections.synchronizedList(new ArrayList<SourceSwitchingIterator>()));
   }
 
   public SourceSwitchingIterator(DataSource source) {
@@ -82,7 +82,7 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
 
   @Override
   public synchronized SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
-    return new SourceSwitchingIterator(source.getDeepCopyDataSource(env), onlySwitchAfterRow, copies, iflag);
+    return new SourceSwitchingIterator(source.getDeepCopyDataSource(env), onlySwitchAfterRow, copies);
   }
 
   @Override
@@ -149,9 +149,6 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
     while (!source.isCurrent()) {
       source = source.getNewDataSource();
       iter = source.iterator();
-      if (iflag != null)
-        ((InterruptibleIterator) iter).setInterruptFlag(iflag);
-
       return true;
     }
 
@@ -164,11 +161,8 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
     this.inclusive = inclusive;
     this.columnFamilies = columnFamilies;
 
-    if (iter == null) {
+    if (iter == null)
       iter = source.iterator();
-      if (iflag != null)
-        ((InterruptibleIterator) iter).setInterruptFlag(iflag);
-    }
 
     readNext(true);
   }
@@ -196,10 +190,10 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
     if (copies.size() != 1)
       throw new IllegalStateException("setInterruptFlag() called after deep copies made " + copies.size());
 
-    this.iflag = flag;
     if (iter != null)
       ((InterruptibleIterator) iter).setInterruptFlag(flag);
 
+    source.setInterruptFlag(flag);
   }
 
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java b/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java
index a52b141..23f08a8 100644
--- a/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java
@@ -19,6 +19,7 @@ package org.apache.accumulo.core.iterators.system;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import junit.framework.TestCase;
 
@@ -26,6 +27,7 @@ import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IterationInterruptedException;
 import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.SortedMapIterator;
@@ -59,6 +61,7 @@ public class SourceSwitchingIteratorTest extends TestCase {
     DataSource next;
     SortedKeyValueIterator<Key,Value> iter;
     List<TestDataSource> copies = new ArrayList<TestDataSource>();
+    AtomicBoolean iflag;
     
     TestDataSource(SortedKeyValueIterator<Key,Value> iter) {
       this(iter, new ArrayList<TestDataSource>());
@@ -82,6 +85,8 @@ public class SourceSwitchingIteratorTest extends TestCase {
     
     @Override
     public SortedKeyValueIterator<Key,Value> iterator() {
+      if (iflag != null)
+        ((InterruptibleIterator) iter).setInterruptFlag(iflag);
       return iter;
     }
     
@@ -98,7 +103,11 @@ public class SourceSwitchingIteratorTest extends TestCase {
           tds.next = new TestDataSource(next.iter.deepCopy(null), next.copies);
       }
     }
-    
+
+    @Override
+    public void setInterruptFlag(AtomicBoolean flag) {
+      this.iflag = flag;
+    }
   }
   
   public void test1() throws Exception {
@@ -236,4 +245,31 @@ public class SourceSwitchingIteratorTest extends TestCase {
     ane(dc1, "r2", "cf1", "cq2", 6, "v4", true);
     assertFalse(dc1.hasTop());
   }
+
+  public void testSetInterrupt() throws Exception {
+
+    TreeMap<Key,Value> tm1 = new TreeMap<Key,Value>();
+    put(tm1, "r1", "cf1", "cq1", 5, "v1");
+
+    SortedMapIterator smi = new SortedMapIterator(tm1);
+    TestDataSource tds = new TestDataSource(smi);
+    SourceSwitchingIterator ssi = new SourceSwitchingIterator(tds, false);
+
+    AtomicBoolean flag = new AtomicBoolean();
+    ssi.setInterruptFlag(flag);
+
+    assertSame(flag, tds.iflag);
+
+    ssi.seek(new Range("r1"), new ArrayList<ByteSequence>(), false);
+    ane(ssi, "r1", "cf1", "cq1", 5, "v1", true);
+    assertFalse(ssi.hasTop());
+
+    flag.set(true);
+
+    try {
+      ssi.seek(new Range("r1"), new ArrayList<ByteSequence>(), false);
+      fail("expected to see IterationInterruptedException");
+    } catch (IterationInterruptedException iie) {}
+
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java b/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java
index 9613cca..cd5ca9c 100644
--- a/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java
+++ b/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java
@@ -27,6 +27,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.concurrent.Semaphore;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.data.Key;
@@ -379,6 +380,7 @@ public class FileManager {
     private boolean current = true;
     private IteratorEnvironment env;
     private String file;
+    private AtomicBoolean iflag;
     
     FileDataSource(String file, SortedKeyValueIterator<Key,Value> iter) {
       this.file = file;
@@ -411,6 +413,8 @@ public class FileManager {
     
     @Override
     public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
+      if (iflag != null)
+        ((InterruptibleIterator) this.iter).setInterruptFlag(iflag);
       return iter;
     }
     
@@ -426,11 +430,20 @@ public class FileManager {
     void setIterator(SortedKeyValueIterator<Key,Value> iter) {
       current = false;
       this.iter = iter;
+
+      if (iflag != null)
+        ((InterruptibleIterator) this.iter).setInterruptFlag(iflag);
+
       for (FileDataSource fds : deepCopies) {
         fds.current = false;
         fds.iter = iter.deepCopy(fds.env);
       }
     }
+
+    @Override
+    public void setInterruptFlag(AtomicBoolean flag) {
+      this.iflag = flag;
+    }
     
   }
   

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java b/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java
index 43cf3c1..b696ff4 100644
--- a/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java
+++ b/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java
@@ -384,15 +384,17 @@ public class InMemoryMap {
     private FileSKVIterator reader;
     private MemoryDataSource parent;
     private IteratorEnvironment env;
+    private AtomicBoolean iflag;
     
     MemoryDataSource() {
-      this(null, false, null);
+      this(null, false, null, null);
     }
     
-    public MemoryDataSource(MemoryDataSource parent, boolean switched, IteratorEnvironment env) {
+    public MemoryDataSource(MemoryDataSource parent, boolean switched, IteratorEnvironment env, AtomicBoolean iflag) {
       this.parent = parent;
       this.switched = switched;
       this.env = env;
+      this.iflag = iflag;
     }
     
     @Override
@@ -428,6 +430,8 @@ public class InMemoryMap {
         FileSystem fs = TraceFileSystem.wrap(FileSystem.getLocal(conf));
         
         reader = new RFileOperations().openReader(memDumpFile, true, fs, conf, ServerConfiguration.getSiteConfiguration());
+        if (iflag != null)
+          reader.setInterruptFlag(iflag);
       }
 
       return reader;
@@ -436,9 +440,11 @@ public class InMemoryMap {
     @Override
     public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
       if (iter == null)
-        if (!switched)
+        if (!switched) {
           iter = map.skvIterator();
-        else {
+          if (iflag != null)
+            iter.setInterruptFlag(iflag);
+        } else {
           if (parent == null)
             iter = new MemKeyConversionIterator(getReader());
           else
@@ -454,7 +460,12 @@ public class InMemoryMap {
     
     @Override
     public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
-      return new MemoryDataSource(parent == null ? this : parent, switched, env);
+      return new MemoryDataSource(parent == null ? this : parent, switched, env, iflag);
+    }
+
+    @Override
+    public void setInterruptFlag(AtomicBoolean flag) {
+      this.iflag = flag;
     }
     
   }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java b/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java
index a1fc707..bb13ff8 100644
--- a/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java
+++ b/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java
@@ -2139,6 +2139,11 @@ public class Tablet {
     public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
       throw new UnsupportedOperationException();
     }
+
+    @Override
+    public void setInterruptFlag(AtomicBoolean flag) {
+      throw new UnsupportedOperationException();
+    }
     
   }
   

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java
----------------------------------------------------------------------
diff --git a/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java b/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java
index c905bb8..683adf4 100644
--- a/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java
+++ b/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java
@@ -23,6 +23,7 @@ import java.util.List;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import junit.framework.TestCase;
 
@@ -32,6 +33,7 @@ import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IterationInterruptedException;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
 import org.apache.accumulo.core.util.LocalityGroupUtil;
@@ -237,7 +239,7 @@ public class InMemoryMapTest extends TestCase {
     ski1.close();
   }
   
-  private void deepCopyAndDelete(int interleaving) throws Exception {
+  private void deepCopyAndDelete(int interleaving, boolean interrupt) throws Exception {
     // interleaving == 0 intentionally omitted, this runs the test w/o deleting in mem map
 
     InMemoryMap imm = new InMemoryMap(false, System.getProperty("user.dir") + "/target");
@@ -247,37 +249,61 @@ public class InMemoryMapTest extends TestCase {
     
     MemoryIterator ski1 = imm.skvIterator();
     
-    if (interleaving == 1)
+    AtomicBoolean iflag = new AtomicBoolean(false);
+    ski1.setInterruptFlag(iflag);
+
+    if (interleaving == 1) {
       imm.delete(0);
+      if (interrupt)
+        iflag.set(true);
+    }
     
     SortedKeyValueIterator<Key,Value> dc = ski1.deepCopy(null);
 
-    if (interleaving == 2)
+    if (interleaving == 2) {
       imm.delete(0);
+      if (interrupt)
+        iflag.set(true);
+    }
 
     dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
     ski1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 
-    if (interleaving == 3)
+    if (interleaving == 3) {
       imm.delete(0);
+      if (interrupt)
+        iflag.set(true);
+    }
 
     ae(dc, "r1", "foo:cq1", 3, "bar1");
     ae(ski1, "r1", "foo:cq1", 3, "bar1");
     dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 
-    if (interleaving == 4)
+    if (interleaving == 4) {
       imm.delete(0);
+      if (interrupt)
+        iflag.set(true);
+    }
 
     ae(ski1, "r1", "foo:cq2", 3, "bar2");
     ae(dc, "r1", "foo:cq1", 3, "bar1");
     ae(dc, "r1", "foo:cq2", 3, "bar2");
     assertFalse(dc.hasTop());
     assertFalse(ski1.hasTop());
+
+    if (interrupt)
+      dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
   }
 
   public void testDeepCopyAndDelete() throws Exception {
     for (int i = 0; i <= 4; i++)
-      deepCopyAndDelete(i);
+      deepCopyAndDelete(i, false);
+
+    for (int i = 1; i <= 4; i++)
+      try {
+        deepCopyAndDelete(i, true);
+        fail("i = " + i);
+      } catch (IterationInterruptedException iie) {}
   }
 
   public void testBug1() throws Exception {


[02/10] git commit: ACCUMULO-1628 Fixes issue after previous changes which interrupted a deep-copy

Posted by el...@apache.org.
ACCUMULO-1628 Fixes issue after previous changes which interrupted a deep-copy

Pushes the interrupt flag from the SourceSwitchingIterator down to the
FileManager and InMemoryMap. This should avoid passing the interrupt
into a deep copy which isn't supported. Adds some more tests which
previously caused the edge case which is now fixed.

Signed-off-by: Josh Elser <el...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/7699e1f4
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/7699e1f4
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/7699e1f4

Branch: refs/heads/1.6.1-SNAPSHOT
Commit: 7699e1f43c4ee51bfa4be1e9e73ea722f934a3d6
Parents: c335fca
Author: Keith Turner <kt...@apache.org>
Authored: Thu Sep 11 16:31:08 2014 -0700
Committer: Josh Elser <el...@apache.org>
Committed: Thu Sep 11 16:31:08 2014 -0700

----------------------------------------------------------------------
 .../system/SourceSwitchingIterator.java         | 20 ++++-------
 .../system/SourceSwitchingIteratorTest.java     | 38 +++++++++++++++++++-
 .../server/tabletserver/FileManager.java        | 13 +++++++
 .../server/tabletserver/InMemoryMap.java        | 21 ++++++++---
 .../accumulo/server/tabletserver/Tablet.java    |  5 +++
 .../server/tabletserver/InMemoryMapTest.java    | 38 ++++++++++++++++----
 6 files changed, 110 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java
index 33d0ebf..6c40176 100644
--- a/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java
@@ -47,6 +47,8 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
     DataSource getDeepCopyDataSource(IteratorEnvironment env);
 
     SortedKeyValueIterator<Key,Value> iterator() throws IOException;
+
+    void setInterruptFlag(AtomicBoolean flag);
   }
 
   private DataSource source;
@@ -60,20 +62,18 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
   private Collection<ByteSequence> columnFamilies;
 
   private boolean onlySwitchAfterRow;
-  private AtomicBoolean iflag;
 
   private final List<SourceSwitchingIterator> copies;
 
-  private SourceSwitchingIterator(DataSource source, boolean onlySwitchAfterRow, List<SourceSwitchingIterator> copies, AtomicBoolean iflag) {
+  private SourceSwitchingIterator(DataSource source, boolean onlySwitchAfterRow, List<SourceSwitchingIterator> copies) {
     this.source = source;
     this.onlySwitchAfterRow = onlySwitchAfterRow;
     this.copies = copies;
-    this.iflag = iflag;
     copies.add(this);
   }
 
   public SourceSwitchingIterator(DataSource source, boolean onlySwitchAfterRow) {
-    this(source, onlySwitchAfterRow, Collections.synchronizedList(new ArrayList<SourceSwitchingIterator>()), null);
+    this(source, onlySwitchAfterRow, Collections.synchronizedList(new ArrayList<SourceSwitchingIterator>()));
   }
 
   public SourceSwitchingIterator(DataSource source) {
@@ -82,7 +82,7 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
 
   @Override
   public synchronized SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
-    return new SourceSwitchingIterator(source.getDeepCopyDataSource(env), onlySwitchAfterRow, copies, iflag);
+    return new SourceSwitchingIterator(source.getDeepCopyDataSource(env), onlySwitchAfterRow, copies);
   }
 
   @Override
@@ -149,9 +149,6 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
     while (!source.isCurrent()) {
       source = source.getNewDataSource();
       iter = source.iterator();
-      if (iflag != null)
-        ((InterruptibleIterator) iter).setInterruptFlag(iflag);
-
       return true;
     }
 
@@ -164,11 +161,8 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
     this.inclusive = inclusive;
     this.columnFamilies = columnFamilies;
 
-    if (iter == null) {
+    if (iter == null)
       iter = source.iterator();
-      if (iflag != null)
-        ((InterruptibleIterator) iter).setInterruptFlag(iflag);
-    }
 
     readNext(true);
   }
@@ -196,10 +190,10 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
     if (copies.size() != 1)
       throw new IllegalStateException("setInterruptFlag() called after deep copies made " + copies.size());
 
-    this.iflag = flag;
     if (iter != null)
       ((InterruptibleIterator) iter).setInterruptFlag(flag);
 
+    source.setInterruptFlag(flag);
   }
 
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java b/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java
index a52b141..23f08a8 100644
--- a/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java
@@ -19,6 +19,7 @@ package org.apache.accumulo.core.iterators.system;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import junit.framework.TestCase;
 
@@ -26,6 +27,7 @@ import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IterationInterruptedException;
 import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.SortedMapIterator;
@@ -59,6 +61,7 @@ public class SourceSwitchingIteratorTest extends TestCase {
     DataSource next;
     SortedKeyValueIterator<Key,Value> iter;
     List<TestDataSource> copies = new ArrayList<TestDataSource>();
+    AtomicBoolean iflag;
     
     TestDataSource(SortedKeyValueIterator<Key,Value> iter) {
       this(iter, new ArrayList<TestDataSource>());
@@ -82,6 +85,8 @@ public class SourceSwitchingIteratorTest extends TestCase {
     
     @Override
     public SortedKeyValueIterator<Key,Value> iterator() {
+      if (iflag != null)
+        ((InterruptibleIterator) iter).setInterruptFlag(iflag);
       return iter;
     }
     
@@ -98,7 +103,11 @@ public class SourceSwitchingIteratorTest extends TestCase {
           tds.next = new TestDataSource(next.iter.deepCopy(null), next.copies);
       }
     }
-    
+
+    @Override
+    public void setInterruptFlag(AtomicBoolean flag) {
+      this.iflag = flag;
+    }
   }
   
   public void test1() throws Exception {
@@ -236,4 +245,31 @@ public class SourceSwitchingIteratorTest extends TestCase {
     ane(dc1, "r2", "cf1", "cq2", 6, "v4", true);
     assertFalse(dc1.hasTop());
   }
+
+  public void testSetInterrupt() throws Exception {
+
+    TreeMap<Key,Value> tm1 = new TreeMap<Key,Value>();
+    put(tm1, "r1", "cf1", "cq1", 5, "v1");
+
+    SortedMapIterator smi = new SortedMapIterator(tm1);
+    TestDataSource tds = new TestDataSource(smi);
+    SourceSwitchingIterator ssi = new SourceSwitchingIterator(tds, false);
+
+    AtomicBoolean flag = new AtomicBoolean();
+    ssi.setInterruptFlag(flag);
+
+    assertSame(flag, tds.iflag);
+
+    ssi.seek(new Range("r1"), new ArrayList<ByteSequence>(), false);
+    ane(ssi, "r1", "cf1", "cq1", 5, "v1", true);
+    assertFalse(ssi.hasTop());
+
+    flag.set(true);
+
+    try {
+      ssi.seek(new Range("r1"), new ArrayList<ByteSequence>(), false);
+      fail("expected to see IterationInterruptedException");
+    } catch (IterationInterruptedException iie) {}
+
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java b/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java
index 9613cca..cd5ca9c 100644
--- a/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java
+++ b/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java
@@ -27,6 +27,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.concurrent.Semaphore;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.data.Key;
@@ -379,6 +380,7 @@ public class FileManager {
     private boolean current = true;
     private IteratorEnvironment env;
     private String file;
+    private AtomicBoolean iflag;
     
     FileDataSource(String file, SortedKeyValueIterator<Key,Value> iter) {
       this.file = file;
@@ -411,6 +413,8 @@ public class FileManager {
     
     @Override
     public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
+      if (iflag != null)
+        ((InterruptibleIterator) this.iter).setInterruptFlag(iflag);
       return iter;
     }
     
@@ -426,11 +430,20 @@ public class FileManager {
     void setIterator(SortedKeyValueIterator<Key,Value> iter) {
       current = false;
       this.iter = iter;
+
+      if (iflag != null)
+        ((InterruptibleIterator) this.iter).setInterruptFlag(iflag);
+
       for (FileDataSource fds : deepCopies) {
         fds.current = false;
         fds.iter = iter.deepCopy(fds.env);
       }
     }
+
+    @Override
+    public void setInterruptFlag(AtomicBoolean flag) {
+      this.iflag = flag;
+    }
     
   }
   

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java b/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java
index 43cf3c1..b696ff4 100644
--- a/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java
+++ b/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java
@@ -384,15 +384,17 @@ public class InMemoryMap {
     private FileSKVIterator reader;
     private MemoryDataSource parent;
     private IteratorEnvironment env;
+    private AtomicBoolean iflag;
     
     MemoryDataSource() {
-      this(null, false, null);
+      this(null, false, null, null);
     }
     
-    public MemoryDataSource(MemoryDataSource parent, boolean switched, IteratorEnvironment env) {
+    public MemoryDataSource(MemoryDataSource parent, boolean switched, IteratorEnvironment env, AtomicBoolean iflag) {
       this.parent = parent;
       this.switched = switched;
       this.env = env;
+      this.iflag = iflag;
     }
     
     @Override
@@ -428,6 +430,8 @@ public class InMemoryMap {
         FileSystem fs = TraceFileSystem.wrap(FileSystem.getLocal(conf));
         
         reader = new RFileOperations().openReader(memDumpFile, true, fs, conf, ServerConfiguration.getSiteConfiguration());
+        if (iflag != null)
+          reader.setInterruptFlag(iflag);
       }
 
       return reader;
@@ -436,9 +440,11 @@ public class InMemoryMap {
     @Override
     public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
       if (iter == null)
-        if (!switched)
+        if (!switched) {
           iter = map.skvIterator();
-        else {
+          if (iflag != null)
+            iter.setInterruptFlag(iflag);
+        } else {
           if (parent == null)
             iter = new MemKeyConversionIterator(getReader());
           else
@@ -454,7 +460,12 @@ public class InMemoryMap {
     
     @Override
     public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
-      return new MemoryDataSource(parent == null ? this : parent, switched, env);
+      return new MemoryDataSource(parent == null ? this : parent, switched, env, iflag);
+    }
+
+    @Override
+    public void setInterruptFlag(AtomicBoolean flag) {
+      this.iflag = flag;
     }
     
   }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java b/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java
index a1fc707..bb13ff8 100644
--- a/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java
+++ b/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java
@@ -2139,6 +2139,11 @@ public class Tablet {
     public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
       throw new UnsupportedOperationException();
     }
+
+    @Override
+    public void setInterruptFlag(AtomicBoolean flag) {
+      throw new UnsupportedOperationException();
+    }
     
   }
   

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java
----------------------------------------------------------------------
diff --git a/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java b/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java
index c905bb8..683adf4 100644
--- a/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java
+++ b/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java
@@ -23,6 +23,7 @@ import java.util.List;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import junit.framework.TestCase;
 
@@ -32,6 +33,7 @@ import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IterationInterruptedException;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
 import org.apache.accumulo.core.util.LocalityGroupUtil;
@@ -237,7 +239,7 @@ public class InMemoryMapTest extends TestCase {
     ski1.close();
   }
   
-  private void deepCopyAndDelete(int interleaving) throws Exception {
+  private void deepCopyAndDelete(int interleaving, boolean interrupt) throws Exception {
     // interleaving == 0 intentionally omitted, this runs the test w/o deleting in mem map
 
     InMemoryMap imm = new InMemoryMap(false, System.getProperty("user.dir") + "/target");
@@ -247,37 +249,61 @@ public class InMemoryMapTest extends TestCase {
     
     MemoryIterator ski1 = imm.skvIterator();
     
-    if (interleaving == 1)
+    AtomicBoolean iflag = new AtomicBoolean(false);
+    ski1.setInterruptFlag(iflag);
+
+    if (interleaving == 1) {
       imm.delete(0);
+      if (interrupt)
+        iflag.set(true);
+    }
     
     SortedKeyValueIterator<Key,Value> dc = ski1.deepCopy(null);
 
-    if (interleaving == 2)
+    if (interleaving == 2) {
       imm.delete(0);
+      if (interrupt)
+        iflag.set(true);
+    }
 
     dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
     ski1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 
-    if (interleaving == 3)
+    if (interleaving == 3) {
       imm.delete(0);
+      if (interrupt)
+        iflag.set(true);
+    }
 
     ae(dc, "r1", "foo:cq1", 3, "bar1");
     ae(ski1, "r1", "foo:cq1", 3, "bar1");
     dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 
-    if (interleaving == 4)
+    if (interleaving == 4) {
       imm.delete(0);
+      if (interrupt)
+        iflag.set(true);
+    }
 
     ae(ski1, "r1", "foo:cq2", 3, "bar2");
     ae(dc, "r1", "foo:cq1", 3, "bar1");
     ae(dc, "r1", "foo:cq2", 3, "bar2");
     assertFalse(dc.hasTop());
     assertFalse(ski1.hasTop());
+
+    if (interrupt)
+      dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
   }
 
   public void testDeepCopyAndDelete() throws Exception {
     for (int i = 0; i <= 4; i++)
-      deepCopyAndDelete(i);
+      deepCopyAndDelete(i, false);
+
+    for (int i = 1; i <= 4; i++)
+      try {
+        deepCopyAndDelete(i, true);
+        fail("i = " + i);
+      } catch (IterationInterruptedException iie) {}
   }
 
   public void testBug1() throws Exception {


[07/10] Merge branch '1.5.2-SNAPSHOT' into 1.6.1-SNAPSHOT

Posted by el...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/d54e0fd8/server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java
----------------------------------------------------------------------
diff --cc server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java
index 3932552,0000000..39c4c39
mode 100644,000000..100644
--- a/server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java
+++ b/server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java
@@@ -1,557 -1,0 +1,584 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.accumulo.tserver;
 +
 +import static org.junit.Assert.assertEquals;
 +import static org.junit.Assert.assertFalse;
 +import static org.junit.Assert.assertTrue;
++import static org.junit.Assert.fail;
 +
 +import java.io.File;
 +import java.io.IOException;
 +import java.util.ArrayList;
 +import java.util.Arrays;
 +import java.util.Collections;
 +import java.util.HashMap;
 +import java.util.HashSet;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.Set;
 +import java.util.concurrent.ExecutorService;
 +import java.util.concurrent.Executors;
 +import java.util.concurrent.TimeUnit;
++import java.util.concurrent.atomic.AtomicBoolean;
 +
 +import org.apache.accumulo.core.data.ArrayByteSequence;
 +import org.apache.accumulo.core.data.ByteSequence;
 +import org.apache.accumulo.core.data.Key;
 +import org.apache.accumulo.core.data.Mutation;
 +import org.apache.accumulo.core.data.Range;
 +import org.apache.accumulo.core.data.Value;
++import org.apache.accumulo.core.iterators.IterationInterruptedException;
 +import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 +import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
 +import org.apache.accumulo.core.util.LocalityGroupUtil;
 +import org.apache.accumulo.server.client.HdfsZooInstance;
 +import org.apache.accumulo.server.conf.ZooConfiguration;
 +import org.apache.accumulo.tserver.InMemoryMap.MemoryIterator;
 +import org.apache.hadoop.io.Text;
 +import org.apache.log4j.Level;
 +import org.apache.log4j.Logger;
 +import org.junit.BeforeClass;
 +import org.junit.Ignore;
 +import org.junit.Rule;
 +import org.junit.Test;
 +import org.junit.rules.TemporaryFolder;
 +
 +public class InMemoryMapTest {
 +
 +  @BeforeClass
 +  public static void setUp() throws Exception {
 +    // suppress log messages having to do with not having an instance
 +    Logger.getLogger(ZooConfiguration.class).setLevel(Level.OFF);
 +    Logger.getLogger(HdfsZooInstance.class).setLevel(Level.OFF);
 +  }
 +
 +  @Rule
 +  public TemporaryFolder tempFolder = new TemporaryFolder(new File(System.getProperty("user.dir") + "/target"));
 +
 +  public void mutate(InMemoryMap imm, String row, String column, long ts) {
 +    Mutation m = new Mutation(new Text(row));
 +    String[] sa = column.split(":");
 +    m.putDelete(new Text(sa[0]), new Text(sa[1]), ts);
 +
 +    imm.mutate(Collections.singletonList(m));
 +  }
 +
 +  public void mutate(InMemoryMap imm, String row, String column, long ts, String value) {
 +    Mutation m = new Mutation(new Text(row));
 +    String[] sa = column.split(":");
 +    m.put(new Text(sa[0]), new Text(sa[1]), ts, new Value(value.getBytes()));
 +
 +    imm.mutate(Collections.singletonList(m));
 +  }
 +
 +  static Key nk(String row, String column, long ts) {
 +    String[] sa = column.split(":");
 +    Key k = new Key(new Text(row), new Text(sa[0]), new Text(sa[1]), ts);
 +    return k;
 +  }
 +
 +  static void ae(SortedKeyValueIterator<Key,Value> dc, String row, String column, int ts, String val) throws IOException {
 +    assertTrue(dc.hasTop());
 +    assertEquals(nk(row, column, ts), dc.getTopKey());
 +    assertEquals(new Value(val.getBytes()), dc.getTopValue());
 +    dc.next();
 +
 +  }
 +
 +  static Set<ByteSequence> newCFSet(String... cfs) {
 +    HashSet<ByteSequence> cfSet = new HashSet<ByteSequence>();
 +    for (String cf : cfs) {
 +      cfSet.add(new ArrayByteSequence(cf));
 +    }
 +    return cfSet;
 +  }
 +
 +  @Test
 +  public void test2() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    MemoryIterator ski1 = imm.skvIterator();
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    MemoryIterator ski2 = imm.skvIterator();
 +
 +    ski1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    assertFalse(ski1.hasTop());
 +
 +    ski2.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    assertTrue(ski2.hasTop());
 +    ae(ski2, "r1", "foo:cq1", 3, "bar1");
 +    assertFalse(ski2.hasTop());
 +
 +  }
 +
 +  @Test
 +  public void test3() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq1", 3, "bar2");
 +    MemoryIterator ski1 = imm.skvIterator();
 +    mutate(imm, "r1", "foo:cq1", 3, "bar3");
 +
 +    mutate(imm, "r3", "foo:cq1", 3, "bar9");
 +    mutate(imm, "r3", "foo:cq1", 3, "bara");
 +
 +    MemoryIterator ski2 = imm.skvIterator();
 +
 +    ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar2");
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +    assertFalse(ski1.hasTop());
 +
 +    ski2.seek(new Range(new Text("r3")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski2, "r3", "foo:cq1", 3, "bara");
 +    ae(ski2, "r3", "foo:cq1", 3, "bar9");
 +    assertFalse(ski1.hasTop());
 +
 +  }
 +
 +  @Test
 +  public void test4() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq1", 3, "bar2");
 +    MemoryIterator ski1 = imm.skvIterator();
 +    mutate(imm, "r1", "foo:cq1", 3, "bar3");
 +
 +    imm.delete(0);
 +
 +    ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar2");
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +    assertFalse(ski1.hasTop());
 +
 +    ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar2");
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +    assertFalse(ski1.hasTop());
 +
 +    ski1.seek(new Range(new Text("r2")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    assertFalse(ski1.hasTop());
 +
 +    ski1.seek(new Range(nk("r1", "foo:cq1", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar2");
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +    assertFalse(ski1.hasTop());
 +
 +    ski1.close();
 +  }
 +
 +  @Test
 +  public void test5() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq1", 3, "bar2");
 +    mutate(imm, "r1", "foo:cq1", 3, "bar3");
 +
 +    MemoryIterator ski1 = imm.skvIterator();
 +    ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar3");
 +
 +    imm.delete(0);
 +
 +    ae(ski1, "r1", "foo:cq1", 3, "bar2");
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +    assertFalse(ski1.hasTop());
 +
 +    ski1.close();
 +
 +    imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq2", 3, "bar2");
 +    mutate(imm, "r1", "foo:cq3", 3, "bar3");
 +
 +    ski1 = imm.skvIterator();
 +    ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +
 +    imm.delete(0);
 +
 +    ae(ski1, "r1", "foo:cq2", 3, "bar2");
 +    ae(ski1, "r1", "foo:cq3", 3, "bar3");
 +    assertFalse(ski1.hasTop());
 +
 +    ski1.close();
 +  }
 +
 +  @Test
 +  public void test6() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq2", 3, "bar2");
 +    mutate(imm, "r1", "foo:cq3", 3, "bar3");
 +    mutate(imm, "r1", "foo:cq4", 3, "bar4");
 +
 +    MemoryIterator ski1 = imm.skvIterator();
 +
 +    mutate(imm, "r1", "foo:cq5", 3, "bar5");
 +
 +    SortedKeyValueIterator<Key,Value> dc = ski1.deepCopy(null);
 +
 +    ski1.seek(new Range(nk("r1", "foo:cq1", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +
 +    dc.seek(new Range(nk("r1", "foo:cq2", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(dc, "r1", "foo:cq2", 3, "bar2");
 +
 +    imm.delete(0);
 +
 +    ae(ski1, "r1", "foo:cq2", 3, "bar2");
 +    ae(dc, "r1", "foo:cq3", 3, "bar3");
 +    ae(ski1, "r1", "foo:cq3", 3, "bar3");
 +    ae(dc, "r1", "foo:cq4", 3, "bar4");
 +    ae(ski1, "r1", "foo:cq4", 3, "bar4");
 +    assertFalse(ski1.hasTop());
 +    assertFalse(dc.hasTop());
 +
 +    ski1.seek(new Range(nk("r1", "foo:cq3", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +
 +    dc.seek(new Range(nk("r1", "foo:cq4", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(dc, "r1", "foo:cq4", 3, "bar4");
 +    assertFalse(dc.hasTop());
 +
 +    ae(ski1, "r1", "foo:cq3", 3, "bar3");
 +    ae(ski1, "r1", "foo:cq4", 3, "bar4");
 +    assertFalse(ski1.hasTop());
 +    assertFalse(dc.hasTop());
 +
 +    ski1.close();
 +  }
- 
-   private void deepCopyAndDelete(int interleaving) throws Exception {
++  
++  private void deepCopyAndDelete(int interleaving, boolean interrupt) throws Exception {
 +    // interleaving == 0 intentionally omitted, this runs the test w/o deleting in mem map
 +
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +    
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq2", 3, "bar2");
 +    
 +    MemoryIterator ski1 = imm.skvIterator();
 +    
-     if (interleaving == 1)
++    AtomicBoolean iflag = new AtomicBoolean(false);
++    ski1.setInterruptFlag(iflag);
++
++    if (interleaving == 1) {
 +      imm.delete(0);
++      if (interrupt)
++        iflag.set(true);
++    }
 +    
 +    SortedKeyValueIterator<Key,Value> dc = ski1.deepCopy(null);
 +
-     if (interleaving == 2)
++    if (interleaving == 2) {
 +      imm.delete(0);
++      if (interrupt)
++        iflag.set(true);
++    }
 +
 +    dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ski1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +
-     if (interleaving == 3)
++    if (interleaving == 3) {
 +      imm.delete(0);
++      if (interrupt)
++        iflag.set(true);
++    }
 +
 +    ae(dc, "r1", "foo:cq1", 3, "bar1");
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +    dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +
-     if (interleaving == 4)
++    if (interleaving == 4) {
 +      imm.delete(0);
++      if (interrupt)
++        iflag.set(true);
++    }
 +
 +    ae(ski1, "r1", "foo:cq2", 3, "bar2");
 +    ae(dc, "r1", "foo:cq1", 3, "bar1");
 +    ae(dc, "r1", "foo:cq2", 3, "bar2");
 +    assertFalse(dc.hasTop());
 +    assertFalse(ski1.hasTop());
++
++    if (interrupt)
++      dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +  }
 +
 +  @Test
 +  public void testDeepCopyAndDelete() throws Exception {
 +    for (int i = 0; i <= 4; i++)
-       deepCopyAndDelete(i);
++      deepCopyAndDelete(i, false);
++
++    for (int i = 1; i <= 4; i++)
++      try {
++        deepCopyAndDelete(i, true);
++        fail("i = " + i);
++      } catch (IterationInterruptedException iie) {}
 +  }
 +   
 +  @Test
 +  public void testBug1() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    for (int i = 0; i < 20; i++) {
 +      mutate(imm, "r1", "foo:cq" + i, 3, "bar" + i);
 +    }
 +
 +    for (int i = 0; i < 20; i++) {
 +      mutate(imm, "r2", "foo:cq" + i, 3, "bar" + i);
 +    }
 +
 +    MemoryIterator ski1 = imm.skvIterator();
 +    ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(ski1);
 +
 +    imm.delete(0);
 +
 +    ArrayList<ByteSequence> columns = new ArrayList<ByteSequence>();
 +    columns.add(new ArrayByteSequence("bar"));
 +
 +    // this seek resulted in an infinite loop before a bug was fixed
 +    cfsi.seek(new Range("r1"), columns, true);
 +
 +    assertFalse(cfsi.hasTop());
 +
 +    ski1.close();
 +  }
 +
 +  @Test
 +  public void testSeekBackWards() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq2", 3, "bar2");
 +    mutate(imm, "r1", "foo:cq3", 3, "bar3");
 +    mutate(imm, "r1", "foo:cq4", 3, "bar4");
 +
 +    MemoryIterator skvi1 = imm.skvIterator();
 +
 +    skvi1.seek(new Range(nk("r1", "foo:cq3", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(skvi1, "r1", "foo:cq3", 3, "bar3");
 +
 +    skvi1.seek(new Range(nk("r1", "foo:cq1", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(skvi1, "r1", "foo:cq1", 3, "bar1");
 +
 +  }
 +
 +  @Test
 +  public void testDuplicateKey() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    Mutation m = new Mutation(new Text("r1"));
 +    m.put(new Text("foo"), new Text("cq"), 3, new Value("v1".getBytes()));
 +    m.put(new Text("foo"), new Text("cq"), 3, new Value("v2".getBytes()));
 +    imm.mutate(Collections.singletonList(m));
 +
 +    MemoryIterator skvi1 = imm.skvIterator();
 +    skvi1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(skvi1, "r1", "foo:cq", 3, "v2");
 +    ae(skvi1, "r1", "foo:cq", 3, "v1");
 +  }
 +
 +  private static final Logger log = Logger.getLogger(InMemoryMapTest.class);
 +
 +  static long sum(long[] counts) {
 +    long result = 0;
 +    for (int i = 0; i < counts.length; i++)
 +      result += counts[i];
 +    return result;
 +  }
 +
 +  // - hard to get this timing test to run well on apache build machines
 +  @Test
 +  @Ignore
 +  public void parallelWriteSpeed() throws InterruptedException, IOException {
 +    List<Double> timings = new ArrayList<Double>();
 +    for (int threads : new int[] {1, 2, 16, /* 64, 256 */}) {
 +      final long now = System.currentTimeMillis();
 +      final long counts[] = new long[threads];
 +      final InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +      ExecutorService e = Executors.newFixedThreadPool(threads);
 +      for (int j = 0; j < threads; j++) {
 +        final int threadId = j;
 +        e.execute(new Runnable() {
 +          @Override
 +          public void run() {
 +            while (System.currentTimeMillis() - now < 1000) {
 +              for (int k = 0; k < 1000; k++) {
 +                Mutation m = new Mutation("row");
 +                m.put("cf", "cq", new Value("v".getBytes()));
 +                List<Mutation> mutations = Collections.singletonList(m);
 +                imm.mutate(mutations);
 +                counts[threadId]++;
 +              }
 +            }
 +          }
 +        });
 +      }
 +      e.shutdown();
 +      e.awaitTermination(10, TimeUnit.SECONDS);
 +      imm.delete(10000);
 +      double mutationsPerSecond = sum(counts) / ((System.currentTimeMillis() - now) / 1000.);
 +      timings.add(mutationsPerSecond);
 +      log.info(String.format("%.1f mutations per second with %d threads", mutationsPerSecond, threads));
 +    }
 +    // verify that more threads doesn't go a lot faster, or a lot slower than one thread
 +    for (int i = 0; i < timings.size(); i++) {
 +      double ratioFirst = timings.get(0) / timings.get(i);
 +      assertTrue(ratioFirst < 3);
 +      assertTrue(ratioFirst > 0.3);
 +    }
 +  }
 +
 +  @Test
 +  public void testLocalityGroups() throws Exception {
 +
 +    Map<String,Set<ByteSequence>> lggroups1 = new HashMap<String,Set<ByteSequence>>();
 +    lggroups1.put("lg1", newCFSet("cf1", "cf2"));
 +    lggroups1.put("lg2", newCFSet("cf3", "cf4"));
 +
 +    InMemoryMap imm = new InMemoryMap(lggroups1, false, tempFolder.newFolder().getAbsolutePath());
 +
 +    Mutation m1 = new Mutation("r1");
 +    m1.put("cf1", "x", 2, "1");
 +    m1.put("cf1", "y", 2, "2");
 +    m1.put("cf3", "z", 2, "3");
 +    m1.put("foo", "b", 2, "9");
 +
 +    Mutation m2 = new Mutation("r2");
 +    m2.put("cf2", "x", 3, "5");
 +
 +    Mutation m3 = new Mutation("r3");
 +    m3.put("foo", "b", 4, "6");
 +
 +    Mutation m4 = new Mutation("r4");
 +    m4.put("foo", "b", 5, "7");
 +    m4.put("cf4", "z", 5, "8");
 +
 +    Mutation m5 = new Mutation("r5");
 +    m5.put("cf3", "z", 6, "A");
 +    m5.put("cf4", "z", 6, "B");
 +
 +    imm.mutate(Arrays.asList(m1, m2, m3, m4, m5));
 +
 +    MemoryIterator iter1 = imm.skvIterator();
 +
 +    seekLocalityGroups(iter1);
 +    SortedKeyValueIterator<Key,Value> dc1 = iter1.deepCopy(null);
 +    seekLocalityGroups(dc1);
 +
 +    assertTrue(imm.getNumEntries() == 10);
 +    assertTrue(imm.estimatedSizeInBytes() > 0);
 +
 +    imm.delete(0);
 +
 +    seekLocalityGroups(iter1);
 +    seekLocalityGroups(dc1);
 +    // TODO uncomment following when ACCUMULO-1628 is fixed
 +    // seekLocalityGroups(iter1.deepCopy(null));
 +  }
 +
 +  private void seekLocalityGroups(SortedKeyValueIterator<Key,Value> iter1) throws IOException {
 +    iter1.seek(new Range(), newCFSet("cf1"), true);
 +    ae(iter1, "r1", "cf1:x", 2, "1");
 +    ae(iter1, "r1", "cf1:y", 2, "2");
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range("r2", "r4"), newCFSet("cf1"), true);
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range(), newCFSet("cf3"), true);
 +    ae(iter1, "r1", "cf3:z", 2, "3");
 +    ae(iter1, "r4", "cf4:z", 5, "8");
 +    ae(iter1, "r5", "cf3:z", 6, "A");
 +    ae(iter1, "r5", "cf4:z", 6, "B");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range(), newCFSet("foo"), true);
 +    ae(iter1, "r1", "foo:b", 2, "9");
 +    ae(iter1, "r3", "foo:b", 4, "6");
 +    ae(iter1, "r4", "foo:b", 5, "7");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range(), newCFSet("cf1", "cf3"), true);
 +    ae(iter1, "r1", "cf1:x", 2, "1");
 +    ae(iter1, "r1", "cf1:y", 2, "2");
 +    ae(iter1, "r1", "cf3:z", 2, "3");
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    ae(iter1, "r4", "cf4:z", 5, "8");
 +    ae(iter1, "r5", "cf3:z", 6, "A");
 +    ae(iter1, "r5", "cf4:z", 6, "B");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range("r2", "r4"), newCFSet("cf1", "cf3"), true);
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    ae(iter1, "r4", "cf4:z", 5, "8");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range(), newCFSet("cf1", "cf3", "foo"), true);
 +    assertAll(iter1);
 +
 +    iter1.seek(new Range("r1", "r2"), newCFSet("cf1", "cf3", "foo"), true);
 +    ae(iter1, "r1", "cf1:x", 2, "1");
 +    ae(iter1, "r1", "cf1:y", 2, "2");
 +    ae(iter1, "r1", "cf3:z", 2, "3");
 +    ae(iter1, "r1", "foo:b", 2, "9");
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    assertAll(iter1);
 +
 +    iter1.seek(new Range(), newCFSet("cf1"), false);
 +    assertAll(iter1);
 +
 +    iter1.seek(new Range(), newCFSet("cf1", "cf2"), false);
 +    ae(iter1, "r1", "cf3:z", 2, "3");
 +    ae(iter1, "r1", "foo:b", 2, "9");
 +    ae(iter1, "r3", "foo:b", 4, "6");
 +    ae(iter1, "r4", "cf4:z", 5, "8");
 +    ae(iter1, "r4", "foo:b", 5, "7");
 +    ae(iter1, "r5", "cf3:z", 6, "A");
 +    ae(iter1, "r5", "cf4:z", 6, "B");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range("r2"), newCFSet("cf1", "cf3", "foo"), true);
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    assertFalse(iter1.hasTop());
 +  }
 +
 +  private void assertAll(SortedKeyValueIterator<Key,Value> iter1) throws IOException {
 +    ae(iter1, "r1", "cf1:x", 2, "1");
 +    ae(iter1, "r1", "cf1:y", 2, "2");
 +    ae(iter1, "r1", "cf3:z", 2, "3");
 +    ae(iter1, "r1", "foo:b", 2, "9");
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    ae(iter1, "r3", "foo:b", 4, "6");
 +    ae(iter1, "r4", "cf4:z", 5, "8");
 +    ae(iter1, "r4", "foo:b", 5, "7");
 +    ae(iter1, "r5", "cf3:z", 6, "A");
 +    ae(iter1, "r5", "cf4:z", 6, "B");
 +    assertFalse(iter1.hasTop());
 +  }
 +}


[09/10] git commit: Merge branch '1.5.2-SNAPSHOT' into 1.6.1-SNAPSHOT

Posted by el...@apache.org.
Merge branch '1.5.2-SNAPSHOT' into 1.6.1-SNAPSHOT

Conflicts:
	server/tserver/src/main/java/org/apache/accumulo/tserver/Tablet.java
	server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/d54e0fd8
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/d54e0fd8
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/d54e0fd8

Branch: refs/heads/1.6.1-SNAPSHOT
Commit: d54e0fd8636405b39a982a6fad5a3fca1593d6cf
Parents: 0d76cd5 7699e1f
Author: Josh Elser <el...@apache.org>
Authored: Thu Sep 11 17:42:01 2014 -0700
Committer: Josh Elser <el...@apache.org>
Committed: Thu Sep 11 17:42:01 2014 -0700

----------------------------------------------------------------------
 .../system/SourceSwitchingIterator.java         | 20 ++++------
 .../system/SourceSwitchingIteratorTest.java     | 38 +++++++++++++++++-
 .../apache/accumulo/tserver/FileManager.java    | 13 +++++++
 .../apache/accumulo/tserver/InMemoryMap.java    | 21 +++++++---
 .../org/apache/accumulo/tserver/Tablet.java     |  5 +++
 .../accumulo/tserver/InMemoryMapTest.java       | 41 ++++++++++++++++----
 6 files changed, 112 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/d54e0fd8/server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
----------------------------------------------------------------------
diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
index 8bf2517,0000000..b82b9cc
mode 100644,000000..100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
@@@ -1,562 -1,0 +1,575 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.accumulo.tserver;
 +
 +import java.io.IOException;
 +import java.util.ArrayList;
 +import java.util.Collection;
 +import java.util.Collections;
 +import java.util.HashMap;
 +import java.util.Iterator;
 +import java.util.LinkedList;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.Map.Entry;
 +import java.util.concurrent.Semaphore;
++import java.util.concurrent.atomic.AtomicBoolean;
 +
 +import org.apache.accumulo.core.conf.Property;
 +import org.apache.accumulo.core.data.Key;
 +import org.apache.accumulo.core.data.KeyExtent;
 +import org.apache.accumulo.core.data.Value;
 +import org.apache.accumulo.core.file.FileOperations;
 +import org.apache.accumulo.core.file.FileSKVIterator;
 +import org.apache.accumulo.core.file.blockfile.cache.BlockCache;
 +import org.apache.accumulo.core.iterators.IteratorEnvironment;
 +import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 +import org.apache.accumulo.core.iterators.system.InterruptibleIterator;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator.DataSource;
 +import org.apache.accumulo.core.iterators.system.TimeSettingIterator;
 +import org.apache.accumulo.core.metadata.schema.DataFileValue;
 +import org.apache.accumulo.server.conf.ServerConfiguration;
 +import org.apache.accumulo.server.fs.FileRef;
 +import org.apache.accumulo.server.fs.VolumeManager;
 +import org.apache.accumulo.server.problems.ProblemReport;
 +import org.apache.accumulo.server.problems.ProblemReportingIterator;
 +import org.apache.accumulo.server.problems.ProblemReports;
 +import org.apache.accumulo.server.problems.ProblemType;
 +import org.apache.accumulo.server.util.time.SimpleTimer;
 +import org.apache.hadoop.fs.FileSystem;
 +import org.apache.hadoop.fs.Path;
 +import org.apache.hadoop.io.Text;
 +import org.apache.log4j.Logger;
 +
 +public class FileManager {
 +  
 +  private static final Logger log = Logger.getLogger(FileManager.class);
 +  
 +  int maxOpen;
 +  
 +  private static class OpenReader implements Comparable<OpenReader> {
 +    long releaseTime;
 +    FileSKVIterator reader;
 +    String fileName;
 +    
 +    public OpenReader(String fileName, FileSKVIterator reader) {
 +      this.fileName = fileName;
 +      this.reader = reader;
 +      this.releaseTime = System.currentTimeMillis();
 +    }
 +    
 +    @Override
 +    public int compareTo(OpenReader o) {
 +      if (releaseTime < o.releaseTime) {
 +        return -1;
 +      } else if (releaseTime > o.releaseTime) {
 +        return 1;
 +      } else {
 +        return 0;
 +      }
 +    }
 +    
 +    @Override
 +    public boolean equals(Object obj) {
 +      if (obj instanceof OpenReader) {
 +        return compareTo((OpenReader) obj) == 0;
 +      }
 +      return false;
 +    }
 +    
 +    @Override
 +    public int hashCode() {
 +      return fileName.hashCode();
 +    }
 +  }
 +  
 +  private Map<String,List<OpenReader>> openFiles;
 +  private HashMap<FileSKVIterator,String> reservedReaders;
 +  
 +  private Semaphore filePermits;
 +  
 +  private VolumeManager fs;
 +  
 +  // the data cache and index cache are allocated in
 +  // TabletResourceManager and passed through the file opener to
 +  // CachableBlockFile which can handle the caches being
 +  // null if unallocated
 +  private BlockCache dataCache = null;
 +  private BlockCache indexCache = null;
 +  
 +  private long maxIdleTime;
 +  
 +  private final ServerConfiguration conf;
 +  
 +  private class IdleFileCloser implements Runnable {
 +    
 +    @Override
 +    public void run() {
 +      
 +      long curTime = System.currentTimeMillis();
 +      
 +      ArrayList<FileSKVIterator> filesToClose = new ArrayList<FileSKVIterator>();
 +      
 +      // determine which files to close in a sync block, and then close the
 +      // files outside of the sync block
 +      synchronized (FileManager.this) {
 +        Iterator<Entry<String,List<OpenReader>>> iter = openFiles.entrySet().iterator();
 +        while (iter.hasNext()) {
 +          Entry<String,List<OpenReader>> entry = iter.next();
 +          List<OpenReader> ofl = entry.getValue();
 +          
 +          for (Iterator<OpenReader> oflIter = ofl.iterator(); oflIter.hasNext();) {
 +            OpenReader openReader = oflIter.next();
 +            
 +            if (curTime - openReader.releaseTime > maxIdleTime) {
 +              
 +              filesToClose.add(openReader.reader);
 +              oflIter.remove();
 +            }
 +          }
 +          
 +          if (ofl.size() == 0) {
 +            iter.remove();
 +          }
 +        }
 +      }
 +      
 +      closeReaders(filesToClose);
 +      
 +    }
 +    
 +  }
 +  
 +  /**
 +   * 
 +   * @param dataCache
 +   *          : underlying file can and should be able to handle a null cache
 +   * @param indexCache
 +   *          : underlying file can and should be able to handle a null cache
 +   */
 +  FileManager(ServerConfiguration conf, VolumeManager fs, int maxOpen, BlockCache dataCache, BlockCache indexCache) {
 +    
 +    if (maxOpen <= 0)
 +      throw new IllegalArgumentException("maxOpen <= 0");
 +    this.conf = conf;
 +    this.dataCache = dataCache;
 +    this.indexCache = indexCache;
 +    
 +    this.filePermits = new Semaphore(maxOpen, true);
 +    this.maxOpen = maxOpen;
 +    this.fs = fs;
 +    
 +    this.openFiles = new HashMap<String,List<OpenReader>>();
 +    this.reservedReaders = new HashMap<FileSKVIterator,String>();
 +    
 +    this.maxIdleTime = conf.getConfiguration().getTimeInMillis(Property.TSERV_MAX_IDLE);
 +    SimpleTimer.getInstance().schedule(new IdleFileCloser(), maxIdleTime, maxIdleTime / 2);
 +    
 +  }
 +  
 +  private static int countReaders(Map<String,List<OpenReader>> files) {
 +    int count = 0;
 +    
 +    for (List<OpenReader> list : files.values()) {
 +      count += list.size();
 +    }
 +    
 +    return count;
 +  }
 +  
 +  private List<FileSKVIterator> takeLRUOpenFiles(int numToTake) {
 +    
 +    ArrayList<OpenReader> openReaders = new ArrayList<OpenReader>();
 +    
 +    for (Entry<String,List<OpenReader>> entry : openFiles.entrySet()) {
 +      openReaders.addAll(entry.getValue());
 +    }
 +    
 +    Collections.sort(openReaders);
 +    
 +    ArrayList<FileSKVIterator> ret = new ArrayList<FileSKVIterator>();
 +    
 +    for (int i = 0; i < numToTake; i++) {
 +      OpenReader or = openReaders.get(i);
 +      
 +      List<OpenReader> ofl = openFiles.get(or.fileName);
 +      if (!ofl.remove(or)) {
 +        throw new RuntimeException("Failed to remove open reader that should have been there");
 +      }
 +      
 +      if (ofl.size() == 0) {
 +        openFiles.remove(or.fileName);
 +      }
 +      
 +      ret.add(or.reader);
 +    }
 +    
 +    return ret;
 +  }
 +  
 +  private static <T> List<T> getFileList(String file, Map<String,List<T>> files) {
 +    List<T> ofl = files.get(file);
 +    if (ofl == null) {
 +      ofl = new ArrayList<T>();
 +      files.put(file, ofl);
 +    }
 +    
 +    return ofl;
 +  }
 +  
 +  private void closeReaders(List<FileSKVIterator> filesToClose) {
 +    for (FileSKVIterator reader : filesToClose) {
 +      try {
 +        reader.close();
 +      } catch (Exception e) {
 +        log.error("Failed to close file " + e.getMessage(), e);
 +      }
 +    }
 +  }
 +  
 +  private List<String> takeOpenFiles(Collection<String> files, List<FileSKVIterator> reservedFiles, Map<FileSKVIterator,String> readersReserved) {
 +    List<String> filesToOpen = new LinkedList<String>(files);
 +    for (Iterator<String> iterator = filesToOpen.iterator(); iterator.hasNext();) {
 +      String file = iterator.next();
 +      
 +      List<OpenReader> ofl = openFiles.get(file);
 +      if (ofl != null && ofl.size() > 0) {
 +        OpenReader openReader = ofl.remove(ofl.size() - 1);
 +        reservedFiles.add(openReader.reader);
 +        readersReserved.put(openReader.reader, file);
 +        if (ofl.size() == 0) {
 +          openFiles.remove(file);
 +        }
 +        iterator.remove();
 +      }
 +      
 +    }
 +    return filesToOpen;
 +  }
 +  
 +  private synchronized String getReservedReadeFilename(FileSKVIterator reader) {
 +    return reservedReaders.get(reader);
 +  }
 +  
 +  private List<FileSKVIterator> reserveReaders(Text table, Collection<String> files, boolean continueOnFailure) throws IOException {
 +    
 +    if (files.size() >= maxOpen) {
 +      throw new IllegalArgumentException("requested files exceeds max open");
 +    }
 +    
 +    if (files.size() == 0) {
 +      return Collections.emptyList();
 +    }
 +    
 +    List<String> filesToOpen = null;
 +    List<FileSKVIterator> filesToClose = Collections.emptyList();
 +    List<FileSKVIterator> reservedFiles = new ArrayList<FileSKVIterator>();
 +    Map<FileSKVIterator,String> readersReserved = new HashMap<FileSKVIterator,String>();
 +    
 +    filePermits.acquireUninterruptibly(files.size());
 +    
 +    // now that the we are past the semaphore, we have the authority
 +    // to open files.size() files
 +    
 +    // determine what work needs to be done in sync block
 +    // but do the work of opening and closing files outside
 +    // a synch block
 +    synchronized (this) {
 +      
 +      filesToOpen = takeOpenFiles(files, reservedFiles, readersReserved);
 +      
 +      int numOpen = countReaders(openFiles);
 +      
 +      if (filesToOpen.size() + numOpen + reservedReaders.size() > maxOpen) {
 +        filesToClose = takeLRUOpenFiles((filesToOpen.size() + numOpen + reservedReaders.size()) - maxOpen);
 +      }
 +    }
 +    
 +    // close files before opening files to ensure we stay under resource
 +    // limitations
 +    closeReaders(filesToClose);
 +    
 +    // open any files that need to be opened
 +    for (String file : filesToOpen) {
 +      try {
 +        if (!file.contains(":"))
 +          throw new IllegalArgumentException("Expected uri, got : " + file);
 +        Path path = new Path(file);
 +        FileSystem ns = fs.getVolumeByPath(path).getFileSystem();
 +        //log.debug("Opening "+file + " path " + path);
 +        FileSKVIterator reader = FileOperations.getInstance().openReader(path.toString(), false, ns, ns.getConf(), conf.getTableConfiguration(table.toString()),
 +            dataCache, indexCache);
 +        reservedFiles.add(reader);
 +        readersReserved.put(reader, file);
 +      } catch (Exception e) {
 +        
 +        ProblemReports.getInstance().report(new ProblemReport(table.toString(), ProblemType.FILE_READ, file, e));
 +        
 +        if (continueOnFailure) {
 +          // release the permit for the file that failed to open
 +          filePermits.release(1);
 +          log.warn("Failed to open file " + file + " " + e.getMessage() + " continuing...");
 +        } else {
 +          // close whatever files were opened
 +          closeReaders(reservedFiles);
 +          
 +          filePermits.release(files.size());
 +          
 +          log.error("Failed to open file " + file + " " + e.getMessage());
 +          throw new IOException("Failed to open " + file, e);
 +        }
 +      }
 +    }
 +    
 +    synchronized (this) {
 +      // update set of reserved readers
 +      reservedReaders.putAll(readersReserved);
 +    }
 +    
 +    return reservedFiles;
 +  }
 +  
 +  private void releaseReaders(List<FileSKVIterator> readers, boolean sawIOException) {
 +    // put files in openFiles
 +    
 +    synchronized (this) {
 +      
 +      // check that readers were actually reserved ... want to make sure a thread does
 +      // not try to release readers they never reserved
 +      if (!reservedReaders.keySet().containsAll(readers)) {
 +        throw new IllegalArgumentException("Asked to release readers that were never reserved ");
 +      }
 +      
 +      for (FileSKVIterator reader : readers) {
 +        try {
 +          reader.closeDeepCopies();
 +        } catch (IOException e) {
 +          log.warn(e, e);
 +          sawIOException = true;
 +        }
 +      }
 +      
 +      for (FileSKVIterator reader : readers) {
 +        String fileName = reservedReaders.remove(reader);
 +        if (!sawIOException)
 +          getFileList(fileName, openFiles).add(new OpenReader(fileName, reader));
 +      }
 +    }
 +    
 +    if (sawIOException)
 +      closeReaders(readers);
 +    
 +    // decrement the semaphore
 +    filePermits.release(readers.size());
 +    
 +  }
 +  
 +  static class FileDataSource implements DataSource {
 +    
 +    private SortedKeyValueIterator<Key,Value> iter;
 +    private ArrayList<FileDataSource> deepCopies;
 +    private boolean current = true;
 +    private IteratorEnvironment env;
 +    private String file;
++    private AtomicBoolean iflag;
 +    
 +    FileDataSource(String file, SortedKeyValueIterator<Key,Value> iter) {
 +      this.file = file;
 +      this.iter = iter;
 +      this.deepCopies = new ArrayList<FileManager.FileDataSource>();
 +    }
 +    
 +    public FileDataSource(IteratorEnvironment env, SortedKeyValueIterator<Key,Value> deepCopy, ArrayList<FileDataSource> deepCopies) {
 +      this.iter = deepCopy;
 +      this.env = env;
 +      this.deepCopies = deepCopies;
 +      deepCopies.add(this);
 +    }
 +    
 +    @Override
 +    public boolean isCurrent() {
 +      return current;
 +    }
 +    
 +    @Override
 +    public DataSource getNewDataSource() {
 +      current = true;
 +      return this;
 +    }
 +    
 +    @Override
 +    public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
 +      return new FileDataSource(env, iter.deepCopy(env), deepCopies);
 +    }
 +    
 +    @Override
 +    public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
++      if (iflag != null)
++        ((InterruptibleIterator) this.iter).setInterruptFlag(iflag);
 +      return iter;
 +    }
 +    
 +    void unsetIterator() {
 +      current = false;
 +      iter = null;
 +      for (FileDataSource fds : deepCopies) {
 +        fds.current = false;
 +        fds.iter = null;
 +      }
 +    }
 +    
 +    void setIterator(SortedKeyValueIterator<Key,Value> iter) {
 +      current = false;
 +      this.iter = iter;
++
++      if (iflag != null)
++        ((InterruptibleIterator) this.iter).setInterruptFlag(iflag);
++
 +      for (FileDataSource fds : deepCopies) {
 +        fds.current = false;
 +        fds.iter = iter.deepCopy(fds.env);
 +      }
 +    }
++
++    @Override
++    public void setInterruptFlag(AtomicBoolean flag) {
++      this.iflag = flag;
++    }
 +    
 +  }
 +  
 +  public class ScanFileManager {
 +    
 +    private ArrayList<FileDataSource> dataSources;
 +    private ArrayList<FileSKVIterator> tabletReservedReaders;
 +    private KeyExtent tablet;
 +    private boolean continueOnFailure;
 +    
 +    ScanFileManager(KeyExtent tablet) {
 +      tabletReservedReaders = new ArrayList<FileSKVIterator>();
 +      dataSources = new ArrayList<FileDataSource>();
 +      this.tablet = tablet;
 +      
 +      continueOnFailure = conf.getTableConfiguration(tablet).getBoolean(Property.TABLE_FAILURES_IGNORE);
 +      
 +      if (tablet.isMeta()) {
 +        continueOnFailure = false;
 +      }
 +    }
 +    
 +    private List<FileSKVIterator> openFileRefs(Collection<FileRef> files) throws TooManyFilesException, IOException {
 +      List<String> strings = new ArrayList<String>(files.size());
 +      for (FileRef ref : files)
 +        strings.add(ref.path().toString());
 +      return openFiles(strings);
 +    }
 +    
 +    private List<FileSKVIterator> openFiles(Collection<String> files) throws TooManyFilesException, IOException {
 +      // one tablet can not open more than maxOpen files, otherwise it could get stuck
 +      // forever waiting on itself to release files
 +      
 +      if (tabletReservedReaders.size() + files.size() >= maxOpen) {
 +        throw new TooManyFilesException("Request to open files would exceed max open files reservedReaders.size()=" + tabletReservedReaders.size()
 +            + " files.size()=" + files.size() + " maxOpen=" + maxOpen + " tablet = " + tablet);
 +      }
 +      
 +      List<FileSKVIterator> newlyReservedReaders = reserveReaders(tablet.getTableId(), files, continueOnFailure);
 +      
 +      tabletReservedReaders.addAll(newlyReservedReaders);
 +      return newlyReservedReaders;
 +    }
 +    
 +    synchronized List<InterruptibleIterator> openFiles(Map<FileRef,DataFileValue> files, boolean detachable) throws IOException {
 +      
 +      List<FileSKVIterator> newlyReservedReaders = openFileRefs(files.keySet());
 +      
 +      ArrayList<InterruptibleIterator> iters = new ArrayList<InterruptibleIterator>();
 +      
 +      for (FileSKVIterator reader : newlyReservedReaders) {
 +        String filename = getReservedReadeFilename(reader);
 +        InterruptibleIterator iter;
 +        if (detachable) {
 +          FileDataSource fds = new FileDataSource(filename, reader);
 +          dataSources.add(fds);
 +          SourceSwitchingIterator ssi = new SourceSwitchingIterator(fds);
 +          iter = new ProblemReportingIterator(tablet.getTableId().toString(), filename, continueOnFailure, ssi);
 +        } else {
 +          iter = new ProblemReportingIterator(tablet.getTableId().toString(), filename, continueOnFailure, reader);
 +        }
 +        DataFileValue value = files.get(new FileRef(filename));
 +        if (value.isTimeSet()) {
 +          iter = new TimeSettingIterator(iter, value.getTime());
 +        }
 +        
 +        iters.add(iter);
 +      }
 +      
 +      return iters;
 +    }
 +    
 +    synchronized void detach() {
 +      
 +      releaseReaders(tabletReservedReaders, false);
 +      tabletReservedReaders.clear();
 +      
 +      for (FileDataSource fds : dataSources)
 +        fds.unsetIterator();
 +    }
 +    
 +    synchronized void reattach() throws IOException {
 +      if (tabletReservedReaders.size() != 0)
 +        throw new IllegalStateException();
 +      
 +      Collection<String> files = new ArrayList<String>();
 +      for (FileDataSource fds : dataSources)
 +        files.add(fds.file);
 +      
 +      List<FileSKVIterator> newlyReservedReaders = openFiles(files);
 +      Map<String,List<FileSKVIterator>> map = new HashMap<String,List<FileSKVIterator>>();
 +      for (FileSKVIterator reader : newlyReservedReaders) {
 +        String fileName = getReservedReadeFilename(reader);
 +        List<FileSKVIterator> list = map.get(fileName);
 +        if (list == null) {
 +          list = new LinkedList<FileSKVIterator>();
 +          map.put(fileName, list);
 +        }
 +        
 +        list.add(reader);
 +      }
 +      
 +      for (FileDataSource fds : dataSources) {
 +        FileSKVIterator reader = map.get(fds.file).remove(0);
 +        fds.setIterator(reader);
 +      }
 +    }
 +    
 +    synchronized void releaseOpenFiles(boolean sawIOException) {
 +      releaseReaders(tabletReservedReaders, sawIOException);
 +      tabletReservedReaders.clear();
 +      dataSources.clear();
 +    }
 +    
 +    synchronized int getNumOpenFiles() {
 +      return tabletReservedReaders.size();
 +    }
 +  }
 +  
 +  public ScanFileManager newScanFileManager(KeyExtent tablet) {
 +    return new ScanFileManager(tablet);
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/d54e0fd8/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
----------------------------------------------------------------------
diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
index 5f6d9ce,0000000..2e15767
mode 100644,000000..100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
@@@ -1,772 -1,0 +1,783 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.accumulo.tserver;
 +
 +import java.io.IOException;
 +import java.io.Serializable;
 +import java.util.ArrayList;
 +import java.util.Collection;
 +import java.util.Collections;
 +import java.util.Comparator;
 +import java.util.HashMap;
 +import java.util.HashSet;
 +import java.util.Iterator;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.Map.Entry;
 +import java.util.Set;
 +import java.util.SortedMap;
 +import java.util.UUID;
 +import java.util.concurrent.ConcurrentSkipListMap;
 +import java.util.concurrent.atomic.AtomicBoolean;
 +import java.util.concurrent.atomic.AtomicInteger;
 +import java.util.concurrent.atomic.AtomicLong;
 +
 +import org.apache.accumulo.core.conf.AccumuloConfiguration;
 +import org.apache.accumulo.core.conf.Property;
 +import org.apache.accumulo.core.data.ByteSequence;
 +import org.apache.accumulo.core.data.ColumnUpdate;
 +import org.apache.accumulo.core.data.Key;
 +import org.apache.accumulo.core.data.Mutation;
 +import org.apache.accumulo.core.data.Range;
 +import org.apache.accumulo.core.data.Value;
 +import org.apache.accumulo.core.file.FileSKVIterator;
 +import org.apache.accumulo.core.file.FileSKVWriter;
 +import org.apache.accumulo.core.file.rfile.RFile;
 +import org.apache.accumulo.core.file.rfile.RFileOperations;
 +import org.apache.accumulo.core.iterators.IteratorEnvironment;
 +import org.apache.accumulo.core.iterators.SkippingIterator;
 +import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 +import org.apache.accumulo.core.iterators.SortedMapIterator;
 +import org.apache.accumulo.core.iterators.WrappingIterator;
 +import org.apache.accumulo.core.iterators.system.InterruptibleIterator;
 +import org.apache.accumulo.core.iterators.system.LocalityGroupIterator;
 +import org.apache.accumulo.core.iterators.system.LocalityGroupIterator.LocalityGroup;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator.DataSource;
 +import org.apache.accumulo.core.util.CachedConfiguration;
 +import org.apache.accumulo.core.util.LocalityGroupUtil;
 +import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError;
 +import org.apache.accumulo.core.util.LocalityGroupUtil.Partitioner;
 +import org.apache.accumulo.core.util.UtilWaitThread;
 +import org.apache.accumulo.server.conf.ServerConfiguration;
 +import org.apache.accumulo.server.trace.TraceFileSystem;
 +import org.apache.commons.lang.mutable.MutableLong;
 +import org.apache.hadoop.conf.Configuration;
 +import org.apache.hadoop.fs.FileSystem;
 +import org.apache.hadoop.fs.Path;
 +import org.apache.log4j.Logger;
 +
 +class MemKeyComparator implements Comparator<Key>, Serializable {
 +  
 +  private static final long serialVersionUID = 1L;
 +
 +  @Override
 +  public int compare(Key k1, Key k2) {
 +    int cmp = k1.compareTo(k2);
 +    
 +    if (cmp == 0) {
 +      if (k1 instanceof MemKey)
 +        if (k2 instanceof MemKey)
 +          cmp = ((MemKey) k2).kvCount - ((MemKey) k1).kvCount;
 +        else
 +          cmp = 1;
 +      else if (k2 instanceof MemKey)
 +        cmp = -1;
 +    }
 +    
 +    return cmp;
 +  }
 +}
 +
 +class PartialMutationSkippingIterator extends SkippingIterator implements InterruptibleIterator {
 +  
 +  int kvCount;
 +  
 +  public PartialMutationSkippingIterator(SortedKeyValueIterator<Key,Value> source, int maxKVCount) {
 +    setSource(source);
 +    this.kvCount = maxKVCount;
 +  }
 +  
 +  @Override
 +  protected void consume() throws IOException {
 +    while (getSource().hasTop() && ((MemKey) getSource().getTopKey()).kvCount > kvCount)
 +      getSource().next();
 +  }
 +  
 +  @Override
 +  public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
 +    return new PartialMutationSkippingIterator(getSource().deepCopy(env), kvCount);
 +  }
 +  
 +  @Override
 +  public void setInterruptFlag(AtomicBoolean flag) {
 +    ((InterruptibleIterator) getSource()).setInterruptFlag(flag);
 +  }
 +  
 +}
 +
 +class MemKeyConversionIterator extends WrappingIterator implements InterruptibleIterator {
 +  MemKey currKey = null;
 +  Value currVal = null;
 +
 +  public MemKeyConversionIterator(SortedKeyValueIterator<Key,Value> source) {
 +    super();
 +    setSource(source);
 +  }
 +
 +  @Override
 +  public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
 +    return new MemKeyConversionIterator(getSource().deepCopy(env));
 +  }
 +  
 +  @Override
 +  public Key getTopKey() {
 +    return currKey;
 +  }
 +  
 +  @Override
 +  public Value getTopValue() {
 +    return currVal;
 +  }
 +  
 +  private void getTopKeyVal() {
 +    Key k = super.getTopKey();
 +    Value v = super.getTopValue();
 +    if (k instanceof MemKey || k == null) {
 +      currKey = (MemKey) k;
 +      currVal = v;
 +      return;
 +    }
 +    currVal = new Value(v);
 +    int mc = MemValue.splitKVCount(currVal);
 +    currKey = new MemKey(k, mc);
 +
 +  }
 +  
 +  public void next() throws IOException {
 +    super.next();
 +    if (hasTop())
 +      getTopKeyVal();
 +  }
 +
 +  public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
 +    super.seek(range, columnFamilies, inclusive);
 +    
 +    if (hasTop())
 +      getTopKeyVal();
 +
 +    Key k = range.getStartKey();
 +    if (k instanceof MemKey && hasTop()) {
 +      while (hasTop() && currKey.compareTo(k) < 0)
 +        next();
 +    }
 +  }
 +
 +  @Override
 +  public void setInterruptFlag(AtomicBoolean flag) {
 +    ((InterruptibleIterator) getSource()).setInterruptFlag(flag);
 +  }
 +
 +}
 +
 +public class InMemoryMap {
 +  private SimpleMap map = null;
 +  
 +  private static final Logger log = Logger.getLogger(InMemoryMap.class);
 +  
 +  private volatile String memDumpFile = null;
 +  private final String memDumpDir;
 +
 +  private Map<String,Set<ByteSequence>> lggroups;
 +  
 +  public InMemoryMap(boolean useNativeMap, String memDumpDir) {
 +    this(new HashMap<String,Set<ByteSequence>>(), useNativeMap, memDumpDir);
 +  }
 +
 +  public InMemoryMap(Map<String,Set<ByteSequence>> lggroups, boolean useNativeMap, String memDumpDir) {
 +    this.memDumpDir = memDumpDir;
 +    this.lggroups = lggroups;
 +    
 +    if (lggroups.size() == 0)
 +      map = newMap(useNativeMap);
 +    else
 +      map = new LocalityGroupMap(lggroups, useNativeMap);
 +  }
 +  
 +  public InMemoryMap(AccumuloConfiguration config) throws LocalityGroupConfigurationError {
 +    this(LocalityGroupUtil.getLocalityGroups(config), config.getBoolean(Property.TSERV_NATIVEMAP_ENABLED), config.get(Property.TSERV_MEMDUMP_DIR));
 +  }
 +  
 +  private static SimpleMap newMap(boolean useNativeMap) {
 +    if (useNativeMap && NativeMap.isLoaded()) {
 +      try {
 +        return new NativeMapWrapper();
 +      } catch (Throwable t) {
 +        log.error("Failed to create native map", t);
 +      }
 +    }
 +    
 +    return new DefaultMap();
 +  }
 +  
 +  private interface SimpleMap {
 +    Value get(Key key);
 +    
 +    Iterator<Entry<Key,Value>> iterator(Key startKey);
 +    
 +    int size();
 +    
 +    InterruptibleIterator skvIterator();
 +    
 +    void delete();
 +    
 +    long getMemoryUsed();
 +    
 +    void mutate(List<Mutation> mutations, int kvCount);
 +  }
 +  
 +  private static class LocalityGroupMap implements SimpleMap {
 +    
 +    private Map<ByteSequence,MutableLong> groupFams[];
 +    
 +    // the last map in the array is the default locality group
 +    private SimpleMap maps[];
 +    private Partitioner partitioner;
 +    private List<Mutation>[] partitioned;
 +    private Set<ByteSequence> nonDefaultColumnFamilies;
 +    
 +    @SuppressWarnings("unchecked")
 +    LocalityGroupMap(Map<String,Set<ByteSequence>> groups, boolean useNativeMap) {
 +      this.groupFams = new Map[groups.size()];
 +      this.maps = new SimpleMap[groups.size() + 1];
 +      this.partitioned = new List[groups.size() + 1];
 +      this.nonDefaultColumnFamilies = new HashSet<ByteSequence>();
 +      
 +      for (int i = 0; i < maps.length; i++) {
 +        maps[i] = newMap(useNativeMap);
 +      }
 +
 +      int count = 0;
 +      for (Set<ByteSequence> cfset : groups.values()) {
 +        HashMap<ByteSequence,MutableLong> map = new HashMap<ByteSequence,MutableLong>();
 +        for (ByteSequence bs : cfset)
 +          map.put(bs, new MutableLong(1));
 +        this.groupFams[count++] = map;
 +        nonDefaultColumnFamilies.addAll(cfset);
 +      }
 +      
 +      partitioner = new LocalityGroupUtil.Partitioner(this.groupFams);
 +      
 +      for (int i = 0; i < partitioned.length; i++) {
 +        partitioned[i] = new ArrayList<Mutation>();
 +      }
 +    }
 +
 +    @Override
 +    public Value get(Key key) {
 +      throw new UnsupportedOperationException();
 +    }
 +    
 +    @Override
 +    public Iterator<Entry<Key,Value>> iterator(Key startKey) {
 +      throw new UnsupportedOperationException();
 +    }
 +    
 +    @Override
 +    public int size() {
 +      int sum = 0;
 +      for (SimpleMap map : maps)
 +        sum += map.size();
 +      return sum;
 +    }
 +    
 +    @Override
 +    public InterruptibleIterator skvIterator() {
 +      LocalityGroup groups[] = new LocalityGroup[maps.length];
 +      for (int i = 0; i < groups.length; i++) {
 +        if (i < groupFams.length)
 +          groups[i] = new LocalityGroup(maps[i].skvIterator(), groupFams[i], false);
 +        else
 +          groups[i] = new LocalityGroup(maps[i].skvIterator(), null, true);
 +      }
 +
 +
 +      return new LocalityGroupIterator(groups, nonDefaultColumnFamilies);
 +    }
 +    
 +    @Override
 +    public void delete() {
 +      for (SimpleMap map : maps)
 +        map.delete();
 +    }
 +    
 +    @Override
 +    public long getMemoryUsed() {
 +      long sum = 0;
 +      for (SimpleMap map : maps)
 +        sum += map.getMemoryUsed();
 +      return sum;
 +    }
 +    
 +    @Override
 +    public synchronized void mutate(List<Mutation> mutations, int kvCount) {
 +      // this method is synchronized because it reuses objects to avoid allocation,
 +      // currently, the method that calls this is synchronized so there is no
 +      // loss in parallelism.... synchronization was added here for future proofing
 +      
 +      try{
 +        partitioner.partition(mutations, partitioned);
 +        
 +        for (int i = 0; i < partitioned.length; i++) {
 +          if (partitioned[i].size() > 0) {
 +            maps[i].mutate(partitioned[i], kvCount);
 +            for (Mutation m : partitioned[i])
 +              kvCount += m.getUpdates().size();
 +          }
 +        }
 +      } finally {
 +        // clear immediately so mutations can be garbage collected
 +        for (List<Mutation> list : partitioned) {
 +          list.clear();
 +        }
 +      }
 +    }
 +    
 +  }
 +
 +  private static class DefaultMap implements SimpleMap {
 +    private ConcurrentSkipListMap<Key,Value> map = new ConcurrentSkipListMap<Key,Value>(new MemKeyComparator());
 +    private AtomicLong bytesInMemory = new AtomicLong();
 +    private AtomicInteger size = new AtomicInteger();
 +    
 +    public void put(Key key, Value value) {
 +      // Always a MemKey, so account for the kvCount int
 +      bytesInMemory.addAndGet(key.getLength() + 4);
 +      bytesInMemory.addAndGet(value.getSize());
 +      if (map.put(key, value) == null)
 +        size.incrementAndGet();
 +    }
 +    
 +    public Value get(Key key) {
 +      return map.get(key);
 +    }
 +    
 +    public Iterator<Entry<Key,Value>> iterator(Key startKey) {
 +      Key lk = new Key(startKey);
 +      SortedMap<Key,Value> tm = map.tailMap(lk);
 +      return tm.entrySet().iterator();
 +    }
 +    
 +    public int size() {
 +      return size.get();
 +    }
 +    
 +    public synchronized InterruptibleIterator skvIterator() {
 +      if (map == null)
 +        throw new IllegalStateException();
 +      
 +      return new SortedMapIterator(map);
 +    }
 +    
 +    public synchronized void delete() {
 +      map = null;
 +    }
 +    
 +    public long getOverheadPerEntry() {
 +      // all of the java objects that are used to hold the
 +      // data and make it searchable have overhead... this
 +      // overhead is estimated using test.EstimateInMemMapOverhead
 +      // and is in bytes.. the estimates were obtained by running
 +      // java 6_16 in 64 bit server mode
 +      
 +      return 200;
 +    }
 +    
 +    @Override
 +    public void mutate(List<Mutation> mutations, int kvCount) {
 +      for (Mutation m : mutations) {
 +        for (ColumnUpdate cvp : m.getUpdates()) {
 +          Key newKey = new MemKey(m.getRow(), cvp.getColumnFamily(), cvp.getColumnQualifier(), cvp.getColumnVisibility(), cvp.getTimestamp(), cvp.isDeleted(),
 +              false, kvCount++);
 +          Value value = new Value(cvp.getValue());
 +          put(newKey, value);
 +        }
 +      }
 +    }
 +    
 +    @Override
 +    public long getMemoryUsed() {
 +      return bytesInMemory.get() + (size() * getOverheadPerEntry());
 +    }
 +  }
 +  
 +  private static class NativeMapWrapper implements SimpleMap {
 +    private NativeMap nativeMap;
 +    
 +    NativeMapWrapper() {
 +      nativeMap = new NativeMap();
 +    }
 +    
 +    public Value get(Key key) {
 +      return nativeMap.get(key);
 +    }
 +    
 +    public Iterator<Entry<Key,Value>> iterator(Key startKey) {
 +      return nativeMap.iterator(startKey);
 +    }
 +    
 +    public int size() {
 +      return nativeMap.size();
 +    }
 +    
 +    public InterruptibleIterator skvIterator() {
 +      return (InterruptibleIterator) nativeMap.skvIterator();
 +    }
 +    
 +    public void delete() {
 +      nativeMap.delete();
 +    }
 +    
 +    public long getMemoryUsed() {
 +      return nativeMap.getMemoryUsed();
 +    }
 +    
 +    @Override
 +    public void mutate(List<Mutation> mutations, int kvCount) {
 +      nativeMap.mutate(mutations, kvCount);
 +    }
 +  }
 +  
 +  private AtomicInteger nextKVCount = new AtomicInteger(1);
 +  private AtomicInteger kvCount = new AtomicInteger(0);
 +
 +  private Object writeSerializer = new Object();
 +  
 +  /**
 +   * Applies changes to a row in the InMemoryMap
 +   * 
 +   */
 +  public void mutate(List<Mutation> mutations) {
 +    int numKVs = 0;
 +    for (int i = 0; i < mutations.size(); i++)
 +      numKVs += mutations.get(i).size();
 +    
 +    // Can not update mutationCount while writes that started before
 +    // are in progress, this would cause partial mutations to be seen.
 +    // Also, can not continue until mutation count is updated, because
 +    // a read may not see a successful write. Therefore writes must
 +    // wait for writes that started before to finish.
 +    //
 +    // using separate lock from this map, to allow read/write in parallel
 +    synchronized (writeSerializer ) {
 +      int kv = nextKVCount.getAndAdd(numKVs);
 +      try {
 +        map.mutate(mutations, kv);
 +      } finally {
 +        kvCount.set(kv + numKVs - 1);
 +      }
 +    }
 +  }
 +  
 +  /**
 +   * Returns a long representing the size of the InMemoryMap
 +   * 
 +   * @return bytesInMemory
 +   */
 +  public synchronized long estimatedSizeInBytes() {
 +    if (map == null)
 +      return 0;
 +    
 +    return map.getMemoryUsed();
 +  }
 +  
 +  Iterator<Map.Entry<Key,Value>> iterator(Key startKey) {
 +    return map.iterator(startKey);
 +  }
 +  
 +  public long getNumEntries() {
 +    return map.size();
 +  }
 +  
 +  private final Set<MemoryIterator> activeIters = Collections.synchronizedSet(new HashSet<MemoryIterator>());
 +  
 +  class MemoryDataSource implements DataSource {
 +    
 +    boolean switched = false;
 +    private InterruptibleIterator iter;
 +    private FileSKVIterator reader;
 +    private MemoryDataSource parent;
 +    private IteratorEnvironment env;
++    private AtomicBoolean iflag;
 +    
 +    MemoryDataSource() {
-       this(null, false, null);
++      this(null, false, null, null);
 +    }
 +    
-     public MemoryDataSource(MemoryDataSource parent, boolean switched, IteratorEnvironment env) {
++    public MemoryDataSource(MemoryDataSource parent, boolean switched, IteratorEnvironment env, AtomicBoolean iflag) {
 +      this.parent = parent;
 +      this.switched = switched;
 +      this.env = env;
++      this.iflag = iflag;
 +    }
 +    
 +    @Override
 +    public boolean isCurrent() {
 +      if (switched)
 +        return true;
 +      else
 +        return memDumpFile == null;
 +    }
 +    
 +    @Override
 +    public DataSource getNewDataSource() {
 +      if (switched)
 +        throw new IllegalStateException();
 +      
 +      if (!isCurrent()) {
 +        switched = true;
 +        iter = null;
 +        try {
 +          // ensure files are referenced even if iterator was never seeked before
 +          iterator();
 +        } catch (IOException e) {
 +          throw new RuntimeException();
 +        }
 +      }
 +      
 +      return this;
 +    }
 +    
 +    private synchronized FileSKVIterator getReader() throws IOException {
 +      if (reader == null) {
 +        Configuration conf = CachedConfiguration.getInstance();
 +        FileSystem fs = TraceFileSystem.wrap(FileSystem.getLocal(conf));
 +        
 +        reader = new RFileOperations().openReader(memDumpFile, true, fs, conf, ServerConfiguration.getSiteConfiguration());
++        if (iflag != null)
++          reader.setInterruptFlag(iflag);
 +      }
 +
 +      return reader;
 +    }
 +
 +    @Override
 +    public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
 +      if (iter == null)
-         if (!switched)
++        if (!switched) {
 +          iter = map.skvIterator();
-         else {
++          if (iflag != null)
++            iter.setInterruptFlag(iflag);
++        } else {
 +          if (parent == null)
 +            iter = new MemKeyConversionIterator(getReader());
 +          else
 +            synchronized (parent) {
 +              // synchronize deep copy operation on parent, this prevents multiple threads from deep copying the rfile shared from parent its possible that the
 +              // thread deleting an InMemoryMap and scan thread could be switching different deep copies
 +              iter = new MemKeyConversionIterator(parent.getReader().deepCopy(env));
 +            }
 +        }
 +      
 +      return iter;
 +    }
 +    
 +    @Override
 +    public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
-       return new MemoryDataSource(parent == null ? this : parent, switched, env);
++      return new MemoryDataSource(parent == null ? this : parent, switched, env, iflag);
++    }
++
++    @Override
++    public void setInterruptFlag(AtomicBoolean flag) {
++      this.iflag = flag;
 +    }
 +    
 +  }
 +  
 +  class MemoryIterator extends WrappingIterator implements InterruptibleIterator {
 +    
 +    private AtomicBoolean closed;
 +    private SourceSwitchingIterator ssi;
 +    private MemoryDataSource mds;
 +    
 +    protected SortedKeyValueIterator<Key,Value> getSource() {
 +      if (closed.get())
 +        throw new IllegalStateException("Memory iterator is closed");
 +      return super.getSource();
 +    }
 +    
 +    private MemoryIterator(InterruptibleIterator source) {
 +      this(source, new AtomicBoolean(false));
 +    }
 +    
 +    private MemoryIterator(SortedKeyValueIterator<Key,Value> source, AtomicBoolean closed) {
 +      setSource(source);
 +      this.closed = closed;
 +    }
 +    
 +    public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
 +      return new MemoryIterator(getSource().deepCopy(env), closed);
 +    }
 +    
 +    public void close() {
 +      
 +      synchronized (this) {
 +        if (closed.compareAndSet(false, true)) {
 +          try {
 +            if (mds.reader != null)
 +              mds.reader.close();
 +          } catch (IOException e) {
 +            log.warn(e, e);
 +          }
 +        }
 +      }
 +      
 +      // remove outside of sync to avoid deadlock
 +      activeIters.remove(this);
 +    }
 +    
 +    private synchronized boolean switchNow() throws IOException {
 +      if (closed.get())
 +        return false;
 +      
 +      ssi.switchNow();
 +      return true;
 +    }
 +    
 +    @Override
 +    public void setInterruptFlag(AtomicBoolean flag) {
 +      ((InterruptibleIterator) getSource()).setInterruptFlag(flag);
 +    }
 +    
 +    private void setSSI(SourceSwitchingIterator ssi) {
 +      this.ssi = ssi;
 +    }
 +    
 +    public void setMDS(MemoryDataSource mds) {
 +      this.mds = mds;
 +    }
 +    
 +  }
 +  
 +  public synchronized MemoryIterator skvIterator() {
 +    if (map == null)
 +      throw new NullPointerException();
 +    
 +    if (deleted)
 +      throw new IllegalStateException("Can not obtain iterator after map deleted");
 +    
 +    int mc = kvCount.get();
 +    MemoryDataSource mds = new MemoryDataSource();
 +    SourceSwitchingIterator ssi = new SourceSwitchingIterator(new MemoryDataSource());
 +    MemoryIterator mi = new MemoryIterator(new PartialMutationSkippingIterator(ssi, mc));
 +    mi.setSSI(ssi);
 +    mi.setMDS(mds);
 +    activeIters.add(mi);
 +    return mi;
 +  }
 +  
 +  public SortedKeyValueIterator<Key,Value> compactionIterator() {
 +    
 +    if (nextKVCount.get() - 1 != kvCount.get())
 +      throw new IllegalStateException("Memory map in unexpected state : nextKVCount = " + nextKVCount.get() + " kvCount = "
 +          + kvCount.get());
 +    
 +    return map.skvIterator();
 +  }
 +  
 +  private boolean deleted = false;
 +  
 +  public void delete(long waitTime) {
 +    
 +    synchronized (this) {
 +      if (deleted)
 +        throw new IllegalStateException("Double delete");
 +      
 +      deleted = true;
 +    }
 +    
 +    long t1 = System.currentTimeMillis();
 +    
 +    while (activeIters.size() > 0 && System.currentTimeMillis() - t1 < waitTime) {
 +      UtilWaitThread.sleep(50);
 +    }
 +    
 +    if (activeIters.size() > 0) {
 +      // dump memmap exactly as is to a tmp file on disk, and switch scans to that temp file
 +      try {
 +        Configuration conf = CachedConfiguration.getInstance();
 +        FileSystem fs = TraceFileSystem.wrap(FileSystem.getLocal(conf));
 +        
 +        String tmpFile = memDumpDir + "/memDump" + UUID.randomUUID() + "." + RFile.EXTENSION;
 +        
 +        Configuration newConf = new Configuration(conf);
 +        newConf.setInt("io.seqfile.compress.blocksize", 100000);
 +        
 +        FileSKVWriter out = new RFileOperations().openWriter(tmpFile, fs, newConf, ServerConfiguration.getSiteConfiguration());
 +        
 +        InterruptibleIterator iter = map.skvIterator();
 +       
 +        HashSet<ByteSequence> allfams= new HashSet<ByteSequence>();
 +        
 +        for(Entry<String, Set<ByteSequence>> entry : lggroups.entrySet()){
 +          allfams.addAll(entry.getValue());
 +          out.startNewLocalityGroup(entry.getKey(), entry.getValue());
 +          iter.seek(new Range(), entry.getValue(), true);
 +          dumpLocalityGroup(out, iter);
 +        }
 +        
 +        out.startDefaultLocalityGroup();
 +        iter.seek(new Range(), allfams, false);
 +       
 +        dumpLocalityGroup(out, iter);
 +        
 +        out.close();
 +        
 +        log.debug("Created mem dump file " + tmpFile);
 +        
 +        memDumpFile = tmpFile;
 +        
 +        synchronized (activeIters) {
 +          for (MemoryIterator mi : activeIters) {
 +            mi.switchNow();
 +          }
 +        }
 +        
 +        // rely on unix behavior that file will be deleted when last
 +        // reader closes it
 +        fs.delete(new Path(memDumpFile), true);
 +        
 +      } catch (IOException ioe) {
 +        log.error("Failed to create mem dump file ", ioe);
 +        
 +        while (activeIters.size() > 0) {
 +          UtilWaitThread.sleep(100);
 +        }
 +      }
 +      
 +    }
 +    
 +    SimpleMap tmpMap = map;
 +    
 +    synchronized (this) {
 +      map = null;
 +    }
 +    
 +    tmpMap.delete();
 +  }
 +
 +  private void dumpLocalityGroup(FileSKVWriter out, InterruptibleIterator iter) throws IOException {
 +    while (iter.hasTop() && activeIters.size() > 0) {
 +      // RFile does not support MemKey, so we move the kv count into the value only for the RFile.
 +      // There is no need to change the MemKey to a normal key because the kvCount info gets lost when it is written
 +      Value newValue = new MemValue(iter.getTopValue(), ((MemKey) iter.getTopKey()).kvCount);
 +      out.append(iter.getTopKey(), newValue);
 +      iter.next();
 +
 +    }
 +  }
 +}


[05/10] Merge branch '1.5.2-SNAPSHOT' into 1.6.1-SNAPSHOT

Posted by el...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/d54e0fd8/server/tserver/src/main/java/org/apache/accumulo/tserver/Tablet.java
----------------------------------------------------------------------
diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/Tablet.java
index f9fdacb,0000000..703d1ae
mode 100644,000000..100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/Tablet.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/Tablet.java
@@@ -1,3833 -1,0 +1,3838 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.accumulo.tserver;
 +
 +import java.io.ByteArrayInputStream;
 +import java.io.DataInputStream;
 +import java.io.FileNotFoundException;
 +import java.io.IOException;
 +import java.util.ArrayList;
 +import java.util.Arrays;
 +import java.util.Collection;
 +import java.util.Collections;
 +import java.util.Comparator;
 +import java.util.EnumSet;
 +import java.util.HashMap;
 +import java.util.HashSet;
 +import java.util.Iterator;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.Map.Entry;
 +import java.util.PriorityQueue;
 +import java.util.Set;
 +import java.util.SortedMap;
 +import java.util.TreeMap;
 +import java.util.TreeSet;
 +import java.util.concurrent.atomic.AtomicBoolean;
 +import java.util.concurrent.atomic.AtomicLong;
 +import java.util.concurrent.atomic.AtomicReference;
 +import java.util.concurrent.locks.ReentrantLock;
 +
 +import org.apache.accumulo.core.Constants;
 +import org.apache.accumulo.core.client.Connector;
 +import org.apache.accumulo.core.client.IteratorSetting;
 +import org.apache.accumulo.core.client.impl.ScannerImpl;
 +import org.apache.accumulo.core.conf.AccumuloConfiguration;
 +import org.apache.accumulo.core.conf.ConfigurationCopy;
 +import org.apache.accumulo.core.conf.ConfigurationObserver;
 +import org.apache.accumulo.core.conf.Property;
 +import org.apache.accumulo.core.constraints.Violations;
 +import org.apache.accumulo.core.data.ByteSequence;
 +import org.apache.accumulo.core.data.Column;
 +import org.apache.accumulo.core.data.ColumnUpdate;
 +import org.apache.accumulo.core.data.Key;
 +import org.apache.accumulo.core.data.KeyExtent;
 +import org.apache.accumulo.core.data.KeyValue;
 +import org.apache.accumulo.core.data.Mutation;
 +import org.apache.accumulo.core.data.Range;
 +import org.apache.accumulo.core.data.Value;
 +import org.apache.accumulo.core.data.thrift.IterInfo;
 +import org.apache.accumulo.core.data.thrift.MapFileInfo;
 +import org.apache.accumulo.core.file.FileOperations;
 +import org.apache.accumulo.core.file.FileSKVIterator;
 +import org.apache.accumulo.core.iterators.IterationInterruptedException;
 +import org.apache.accumulo.core.iterators.IteratorEnvironment;
 +import org.apache.accumulo.core.iterators.IteratorUtil;
 +import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
 +import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 +import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
 +import org.apache.accumulo.core.iterators.system.ColumnQualifierFilter;
 +import org.apache.accumulo.core.iterators.system.DeletingIterator;
 +import org.apache.accumulo.core.iterators.system.InterruptibleIterator;
 +import org.apache.accumulo.core.iterators.system.MultiIterator;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator.DataSource;
 +import org.apache.accumulo.core.iterators.system.StatsIterator;
 +import org.apache.accumulo.core.iterators.system.VisibilityFilter;
 +import org.apache.accumulo.core.master.thrift.TabletLoadState;
 +import org.apache.accumulo.core.metadata.MetadataTable;
 +import org.apache.accumulo.core.metadata.RootTable;
 +import org.apache.accumulo.core.metadata.schema.DataFileValue;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LogColumnFamily;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ScanFileColumnFamily;
 +import org.apache.accumulo.core.security.Authorizations;
 +import org.apache.accumulo.core.security.ColumnVisibility;
 +import org.apache.accumulo.core.security.Credentials;
 +import org.apache.accumulo.core.tabletserver.log.LogEntry;
 +import org.apache.accumulo.core.util.CachedConfiguration;
 +import org.apache.accumulo.core.util.LocalityGroupUtil;
 +import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError;
 +import org.apache.accumulo.core.util.MapCounter;
 +import org.apache.accumulo.core.util.Pair;
 +import org.apache.accumulo.core.util.UtilWaitThread;
 +import org.apache.accumulo.fate.zookeeper.IZooReaderWriter;
 +import org.apache.accumulo.server.ServerConstants;
 +import org.apache.accumulo.server.client.HdfsZooInstance;
 +import org.apache.accumulo.server.conf.TableConfiguration;
 +import org.apache.accumulo.server.fs.FileRef;
 +import org.apache.accumulo.server.fs.VolumeManager;
 +import org.apache.accumulo.server.fs.VolumeManager.FileType;
 +import org.apache.accumulo.server.fs.VolumeManagerImpl;
 +import org.apache.accumulo.server.fs.VolumeUtil;
 +import org.apache.accumulo.server.fs.VolumeUtil.TabletFiles;
 +import org.apache.accumulo.server.master.state.TServerInstance;
 +import org.apache.accumulo.server.master.tableOps.CompactionIterators;
 +import org.apache.accumulo.server.problems.ProblemReport;
 +import org.apache.accumulo.server.problems.ProblemReports;
 +import org.apache.accumulo.server.problems.ProblemType;
 +import org.apache.accumulo.server.security.SystemCredentials;
 +import org.apache.accumulo.server.tablets.TabletTime;
 +import org.apache.accumulo.server.tablets.UniqueNameAllocator;
 +import org.apache.accumulo.server.util.FileUtil;
 +import org.apache.accumulo.server.util.MasterMetadataUtil;
 +import org.apache.accumulo.server.util.MetadataTableUtil;
 +import org.apache.accumulo.server.util.TabletOperations;
 +import org.apache.accumulo.server.zookeeper.ZooReaderWriter;
 +import org.apache.accumulo.start.classloader.vfs.AccumuloVFSClassLoader;
 +import org.apache.accumulo.trace.instrument.Span;
 +import org.apache.accumulo.trace.instrument.Trace;
 +import org.apache.accumulo.tserver.Compactor.CompactionCanceledException;
 +import org.apache.accumulo.tserver.Compactor.CompactionEnv;
 +import org.apache.accumulo.tserver.FileManager.ScanFileManager;
 +import org.apache.accumulo.tserver.InMemoryMap.MemoryIterator;
 +import org.apache.accumulo.tserver.TabletServer.TservConstraintEnv;
 +import org.apache.accumulo.tserver.TabletServerResourceManager.TabletResourceManager;
 +import org.apache.accumulo.tserver.TabletStatsKeeper.Operation;
 +import org.apache.accumulo.tserver.compaction.CompactionPlan;
 +import org.apache.accumulo.tserver.compaction.CompactionStrategy;
 +import org.apache.accumulo.tserver.compaction.DefaultCompactionStrategy;
 +import org.apache.accumulo.tserver.compaction.MajorCompactionReason;
 +import org.apache.accumulo.tserver.compaction.MajorCompactionRequest;
 +import org.apache.accumulo.tserver.compaction.WriteParameters;
 +import org.apache.accumulo.tserver.constraints.ConstraintChecker;
 +import org.apache.accumulo.tserver.log.DfsLogger;
 +import org.apache.accumulo.tserver.log.MutationReceiver;
 +import org.apache.accumulo.tserver.mastermessage.TabletStatusMessage;
 +import org.apache.accumulo.tserver.metrics.TabletServerMinCMetrics;
 +import org.apache.commons.codec.DecoderException;
 +import org.apache.commons.codec.binary.Hex;
 +import org.apache.hadoop.conf.Configuration;
 +import org.apache.hadoop.fs.FileStatus;
 +import org.apache.hadoop.fs.FileSystem;
 +import org.apache.hadoop.fs.Path;
 +import org.apache.hadoop.io.Text;
 +import org.apache.log4j.Logger;
 +import org.apache.zookeeper.KeeperException;
 +import org.apache.zookeeper.KeeperException.NoNodeException;
 +
 +import com.google.common.annotations.VisibleForTesting;
 +
 +/*
 + * We need to be able to have the master tell a tabletServer to
 + * close this file, and the tablet server to handle all pending client reads
 + * before closing
 + * 
 + */
 +
 +/**
 + * 
 + * this class just provides an interface to read from a MapFile mostly takes care of reporting start and end keys
 + * 
 + * need this because a single row extent can have multiple columns this manages all the columns (each handled by a store) for a single row-extent
 + * 
 + * 
 + */
 +
 +public class Tablet {
 +
 +  enum MinorCompactionReason {
 +    USER, SYSTEM, CLOSE, RECOVERY
 +  }
 +
 +  public class CommitSession {
 +
 +    private int seq;
 +    private InMemoryMap memTable;
 +    private int commitsInProgress;
 +    private long maxCommittedTime = Long.MIN_VALUE;
 +
 +    private CommitSession(int seq, InMemoryMap imm) {
 +      this.seq = seq;
 +      this.memTable = imm;
 +      commitsInProgress = 0;
 +    }
 +
 +    public int getWALogSeq() {
 +      return seq;
 +    }
 +
 +    private void decrementCommitsInProgress() {
 +      if (commitsInProgress < 1)
 +        throw new IllegalStateException("commitsInProgress = " + commitsInProgress);
 +
 +      commitsInProgress--;
 +      if (commitsInProgress == 0)
 +        Tablet.this.notifyAll();
 +    }
 +
 +    private void incrementCommitsInProgress() {
 +      if (commitsInProgress < 0)
 +        throw new IllegalStateException("commitsInProgress = " + commitsInProgress);
 +
 +      commitsInProgress++;
 +    }
 +
 +    private void waitForCommitsToFinish() {
 +      while (commitsInProgress > 0) {
 +        try {
 +          Tablet.this.wait(50);
 +        } catch (InterruptedException e) {
 +          log.warn(e, e);
 +        }
 +      }
 +    }
 +
 +    public void abortCommit(List<Mutation> value) {
 +      Tablet.this.abortCommit(this, value);
 +    }
 +
 +    public void commit(List<Mutation> mutations) {
 +      Tablet.this.commit(this, mutations);
 +    }
 +
 +    public Tablet getTablet() {
 +      return Tablet.this;
 +    }
 +
 +    public boolean beginUpdatingLogsUsed(ArrayList<DfsLogger> copy, boolean mincFinish) {
 +      return Tablet.this.beginUpdatingLogsUsed(memTable, copy, mincFinish);
 +    }
 +
 +    public void finishUpdatingLogsUsed() {
 +      Tablet.this.finishUpdatingLogsUsed();
 +    }
 +
 +    public int getLogId() {
 +      return logId;
 +    }
 +
 +    public KeyExtent getExtent() {
 +      return extent;
 +    }
 +
 +    private void updateMaxCommittedTime(long time) {
 +      maxCommittedTime = Math.max(time, maxCommittedTime);
 +    }
 +
 +    private long getMaxCommittedTime() {
 +      if (maxCommittedTime == Long.MIN_VALUE)
 +        throw new IllegalStateException("Tried to read max committed time when it was never set");
 +      return maxCommittedTime;
 +    }
 +
 +  }
 +
 +  private class TabletMemory {
 +    private InMemoryMap memTable;
 +    private InMemoryMap otherMemTable;
 +    private InMemoryMap deletingMemTable;
 +    private int nextSeq = 1;
 +    private CommitSession commitSession;
 +
 +    TabletMemory() {
 +      try {
 +        memTable = new InMemoryMap(acuTableConf);
 +      } catch (LocalityGroupConfigurationError e) {
 +        throw new RuntimeException(e);
 +      }
 +      commitSession = new CommitSession(nextSeq, memTable);
 +      nextSeq += 2;
 +    }
 +
 +    InMemoryMap getMemTable() {
 +      return memTable;
 +    }
 +
 +    InMemoryMap getMinCMemTable() {
 +      return otherMemTable;
 +    }
 +
 +    CommitSession prepareForMinC() {
 +      if (otherMemTable != null) {
 +        throw new IllegalStateException();
 +      }
 +
 +      if (deletingMemTable != null) {
 +        throw new IllegalStateException();
 +      }
 +
 +      otherMemTable = memTable;
 +      try {
 +        memTable = new InMemoryMap(acuTableConf);
 +      } catch (LocalityGroupConfigurationError e) {
 +        throw new RuntimeException(e);
 +      }
 +
 +      CommitSession oldCommitSession = commitSession;
 +      commitSession = new CommitSession(nextSeq, memTable);
 +      nextSeq += 2;
 +
 +      tabletResources.updateMemoryUsageStats(memTable.estimatedSizeInBytes(), otherMemTable.estimatedSizeInBytes());
 +
 +      return oldCommitSession;
 +    }
 +
 +    void finishedMinC() {
 +
 +      if (otherMemTable == null) {
 +        throw new IllegalStateException();
 +      }
 +
 +      if (deletingMemTable != null) {
 +        throw new IllegalStateException();
 +      }
 +
 +      deletingMemTable = otherMemTable;
 +
 +      otherMemTable = null;
 +      Tablet.this.notifyAll();
 +    }
 +
 +    void finalizeMinC() {
 +      try {
 +        deletingMemTable.delete(15000);
 +      } finally {
 +        synchronized (Tablet.this) {
 +          if (otherMemTable != null) {
 +            throw new IllegalStateException();
 +          }
 +
 +          if (deletingMemTable == null) {
 +            throw new IllegalStateException();
 +          }
 +
 +          deletingMemTable = null;
 +
 +          tabletResources.updateMemoryUsageStats(memTable.estimatedSizeInBytes(), 0);
 +        }
 +      }
 +    }
 +
 +    boolean memoryReservedForMinC() {
 +      return otherMemTable != null || deletingMemTable != null;
 +    }
 +
 +    void waitForMinC() {
 +      while (otherMemTable != null || deletingMemTable != null) {
 +        try {
 +          Tablet.this.wait(50);
 +        } catch (InterruptedException e) {
 +          log.warn(e, e);
 +        }
 +      }
 +    }
 +
 +    void mutate(CommitSession cm, List<Mutation> mutations) {
 +      cm.memTable.mutate(mutations);
 +    }
 +
 +    void updateMemoryUsageStats() {
 +      long other = 0;
 +      if (otherMemTable != null)
 +        other = otherMemTable.estimatedSizeInBytes();
 +      else if (deletingMemTable != null)
 +        other = deletingMemTable.estimatedSizeInBytes();
 +
 +      tabletResources.updateMemoryUsageStats(memTable.estimatedSizeInBytes(), other);
 +    }
 +
 +    List<MemoryIterator> getIterators() {
 +      List<MemoryIterator> toReturn = new ArrayList<MemoryIterator>(2);
 +      toReturn.add(memTable.skvIterator());
 +      if (otherMemTable != null)
 +        toReturn.add(otherMemTable.skvIterator());
 +      return toReturn;
 +    }
 +
 +    void returnIterators(List<MemoryIterator> iters) {
 +      for (MemoryIterator iter : iters) {
 +        iter.close();
 +      }
 +    }
 +
 +    public long getNumEntries() {
 +      if (otherMemTable != null)
 +        return memTable.getNumEntries() + otherMemTable.getNumEntries();
 +      return memTable.getNumEntries();
 +    }
 +
 +    CommitSession getCommitSession() {
 +      return commitSession;
 +    }
 +  }
 +
 +  private TabletMemory tabletMemory;
 +
 +  private final TabletTime tabletTime;
 +  private long persistedTime;
 +  private final Object timeLock = new Object();
 +
 +  private final Path location; // absolute path of this tablets dir
 +  private TServerInstance lastLocation;
 +
 +  private Configuration conf;
 +  private VolumeManager fs;
 +
 +  private TableConfiguration acuTableConf;
 +
 +  private volatile boolean tableDirChecked = false;
 +
 +  private AtomicLong dataSourceDeletions = new AtomicLong(0);
 +  private Set<ScanDataSource> activeScans = new HashSet<ScanDataSource>();
 +
 +  private volatile boolean closing = false;
 +  private boolean closed = false;
 +  private boolean closeComplete = false;
 +
 +  private long lastFlushID = -1;
 +  private long lastCompactID = -1;
 +
 +  private KeyExtent extent;
 +
 +  private TabletResourceManager tabletResources;
 +  final private DatafileManager datafileManager;
 +  private volatile boolean majorCompactionInProgress = false;
 +  private volatile boolean majorCompactionWaitingToStart = false;
 +  private Set<MajorCompactionReason> majorCompactionQueued = Collections.synchronizedSet(EnumSet.noneOf(MajorCompactionReason.class));
 +  private volatile boolean minorCompactionInProgress = false;
 +  private volatile boolean minorCompactionWaitingToStart = false;
 +
 +  private boolean updatingFlushID = false;
 +
 +  private AtomicReference<ConstraintChecker> constraintChecker = new AtomicReference<ConstraintChecker>();
 +
 +  private final String tabletDirectory;
 +
 +  private int writesInProgress = 0;
 +
 +  private static final Logger log = Logger.getLogger(Tablet.class);
 +  public TabletStatsKeeper timer;
 +
 +  private Rate queryRate = new Rate(0.2);
 +  private long queryCount = 0;
 +
 +  private Rate queryByteRate = new Rate(0.2);
 +  private long queryBytes = 0;
 +
 +  private Rate ingestRate = new Rate(0.2);
 +  private long ingestCount = 0;
 +
 +  private Rate ingestByteRate = new Rate(0.2);
 +  private long ingestBytes = 0;
 +
 +  private byte[] defaultSecurityLabel = new byte[0];
 +
 +  private long lastMinorCompactionFinishTime;
 +  private long lastMapFileImportTime;
 +
 +  private volatile long numEntries;
 +  private volatile long numEntriesInMemory;
 +
 +  // a count of the amount of data read by the iterators
 +  private AtomicLong scannedCount = new AtomicLong(0);
 +  private Rate scannedRate = new Rate(0.2);
 +
 +  private ConfigurationObserver configObserver;
 +
 +  private TabletServer tabletServer;
 +
 +  private final int logId;
 +  // ensure we only have one reader/writer of our bulk file notes at at time
 +  public final Object bulkFileImportLock = new Object();
 +
 +  public int getLogId() {
 +    return logId;
 +  }
 +
 +  public static class TabletClosedException extends RuntimeException {
 +    public TabletClosedException(Exception e) {
 +      super(e);
 +    }
 +
 +    public TabletClosedException() {
 +      super();
 +    }
 +
 +    private static final long serialVersionUID = 1L;
 +  }
 +
 +  FileRef getNextMapFilename(String prefix) throws IOException {
 +    String extension = FileOperations.getNewFileExtension(tabletServer.getTableConfiguration(extent));
 +    checkTabletDir();
 +    return new FileRef(location.toString() + "/" + prefix + UniqueNameAllocator.getInstance().getNextName() + "." + extension);
 +  }
 +
 +  private void checkTabletDir() throws IOException {
 +    if (!tableDirChecked) {
 +      checkTabletDir(this.location);
 +      tableDirChecked = true;
 +    }
 +  }
 +
 +  private void checkTabletDir(Path tabletDir) throws IOException {
 +
 +    FileStatus[] files = null;
 +    try {
 +      files = fs.listStatus(tabletDir);
 +    } catch (FileNotFoundException ex) {
 +      // ignored
 +    }
 +
 +    if (files == null) {
 +      if (tabletDir.getName().startsWith("c-"))
 +        log.debug("Tablet " + extent + " had no dir, creating " + tabletDir); // its a clone dir...
 +      else
 +        log.warn("Tablet " + extent + " had no dir, creating " + tabletDir);
 +
 +      fs.mkdirs(tabletDir);
 +    }
 +  }
 +
 +  class DatafileManager {
 +    // access to datafilesizes needs to be synchronized: see CompactionRunner#getNumFiles
 +    final private Map<FileRef,DataFileValue> datafileSizes = Collections.synchronizedMap(new TreeMap<FileRef,DataFileValue>());
 +
 +    DatafileManager(SortedMap<FileRef,DataFileValue> datafileSizes) {
 +      for (Entry<FileRef,DataFileValue> datafiles : datafileSizes.entrySet())
 +        this.datafileSizes.put(datafiles.getKey(), datafiles.getValue());
 +    }
 +
 +    FileRef mergingMinorCompactionFile = null;
 +    Set<FileRef> filesToDeleteAfterScan = new HashSet<FileRef>();
 +    Map<Long,Set<FileRef>> scanFileReservations = new HashMap<Long,Set<FileRef>>();
 +    MapCounter<FileRef> fileScanReferenceCounts = new MapCounter<FileRef>();
 +    long nextScanReservationId = 0;
 +    boolean reservationsBlocked = false;
 +
 +    Set<FileRef> majorCompactingFiles = new HashSet<FileRef>();
 +
 +    Pair<Long,Map<FileRef,DataFileValue>> reserveFilesForScan() {
 +      synchronized (Tablet.this) {
 +
 +        while (reservationsBlocked) {
 +          try {
 +            Tablet.this.wait(50);
 +          } catch (InterruptedException e) {
 +            log.warn(e, e);
 +          }
 +        }
 +
 +        Set<FileRef> absFilePaths = new HashSet<FileRef>(datafileSizes.keySet());
 +
 +        long rid = nextScanReservationId++;
 +
 +        scanFileReservations.put(rid, absFilePaths);
 +
 +        Map<FileRef,DataFileValue> ret = new HashMap<FileRef,DataFileValue>();
 +
 +        for (FileRef path : absFilePaths) {
 +          fileScanReferenceCounts.increment(path, 1);
 +          ret.put(path, datafileSizes.get(path));
 +        }
 +
 +        return new Pair<Long,Map<FileRef,DataFileValue>>(rid, ret);
 +      }
 +    }
 +
 +    void returnFilesForScan(Long reservationId) {
 +
 +      final Set<FileRef> filesToDelete = new HashSet<FileRef>();
 +
 +      synchronized (Tablet.this) {
 +        Set<FileRef> absFilePaths = scanFileReservations.remove(reservationId);
 +
 +        if (absFilePaths == null)
 +          throw new IllegalArgumentException("Unknown scan reservation id " + reservationId);
 +
 +        boolean notify = false;
 +        for (FileRef path : absFilePaths) {
 +          long refCount = fileScanReferenceCounts.decrement(path, 1);
 +          if (refCount == 0) {
 +            if (filesToDeleteAfterScan.remove(path))
 +              filesToDelete.add(path);
 +            notify = true;
 +          } else if (refCount < 0)
 +            throw new IllegalStateException("Scan ref count for " + path + " is " + refCount);
 +        }
 +
 +        if (notify)
 +          Tablet.this.notifyAll();
 +      }
 +
 +      if (filesToDelete.size() > 0) {
 +        log.debug("Removing scan refs from metadata " + extent + " " + filesToDelete);
 +        MetadataTableUtil.removeScanFiles(extent, filesToDelete, SystemCredentials.get(), tabletServer.getLock());
 +      }
 +    }
 +
 +    private void removeFilesAfterScan(Set<FileRef> scanFiles) {
 +      if (scanFiles.size() == 0)
 +        return;
 +
 +      Set<FileRef> filesToDelete = new HashSet<FileRef>();
 +
 +      synchronized (Tablet.this) {
 +        for (FileRef path : scanFiles) {
 +          if (fileScanReferenceCounts.get(path) == 0)
 +            filesToDelete.add(path);
 +          else
 +            filesToDeleteAfterScan.add(path);
 +        }
 +      }
 +
 +      if (filesToDelete.size() > 0) {
 +        log.debug("Removing scan refs from metadata " + extent + " " + filesToDelete);
 +        MetadataTableUtil.removeScanFiles(extent, filesToDelete, SystemCredentials.get(), tabletServer.getLock());
 +      }
 +    }
 +
 +    private TreeSet<FileRef> waitForScansToFinish(Set<FileRef> pathsToWaitFor, boolean blockNewScans, long maxWaitTime) {
 +      long startTime = System.currentTimeMillis();
 +      TreeSet<FileRef> inUse = new TreeSet<FileRef>();
 +
 +      Span waitForScans = Trace.start("waitForScans");
 +      try {
 +        synchronized (Tablet.this) {
 +          if (blockNewScans) {
 +            if (reservationsBlocked)
 +              throw new IllegalStateException();
 +
 +            reservationsBlocked = true;
 +          }
 +
 +          for (FileRef path : pathsToWaitFor) {
 +            while (fileScanReferenceCounts.get(path) > 0 && System.currentTimeMillis() - startTime < maxWaitTime) {
 +              try {
 +                Tablet.this.wait(100);
 +              } catch (InterruptedException e) {
 +                log.warn(e, e);
 +              }
 +            }
 +          }
 +
 +          for (FileRef path : pathsToWaitFor) {
 +            if (fileScanReferenceCounts.get(path) > 0)
 +              inUse.add(path);
 +          }
 +
 +          if (blockNewScans) {
 +            reservationsBlocked = false;
 +            Tablet.this.notifyAll();
 +          }
 +
 +        }
 +      } finally {
 +        waitForScans.stop();
 +      }
 +      return inUse;
 +    }
 +
 +    public void importMapFiles(long tid, Map<FileRef,DataFileValue> pathsString, boolean setTime) throws IOException {
 +
 +      String bulkDir = null;
 +
 +      Map<FileRef,DataFileValue> paths = new HashMap<FileRef,DataFileValue>();
 +      for (Entry<FileRef,DataFileValue> entry : pathsString.entrySet())
 +        paths.put(entry.getKey(), entry.getValue());
 +
 +      for (FileRef tpath : paths.keySet()) {
 +
 +        boolean inTheRightDirectory = false;
 +        Path parent = tpath.path().getParent().getParent();
 +        for (String tablesDir : ServerConstants.getTablesDirs()) {
 +          if (parent.equals(new Path(tablesDir, extent.getTableId().toString()))) {
 +            inTheRightDirectory = true;
 +            break;
 +          }
 +        }
 +        if (!inTheRightDirectory) {
 +          throw new IOException("Data file " + tpath + " not in table dirs");
 +        }
 +
 +        if (bulkDir == null)
 +          bulkDir = tpath.path().getParent().toString();
 +        else if (!bulkDir.equals(tpath.path().getParent().toString()))
 +          throw new IllegalArgumentException("bulk files in different dirs " + bulkDir + " " + tpath);
 +
 +      }
 +
 +      if (extent.isRootTablet()) {
 +        throw new IllegalArgumentException("Can not import files to root tablet");
 +      }
 +
 +      synchronized (bulkFileImportLock) {
 +        Credentials creds = SystemCredentials.get();
 +        Connector conn;
 +        try {
 +          conn = HdfsZooInstance.getInstance().getConnector(creds.getPrincipal(), creds.getToken());
 +        } catch (Exception ex) {
 +          throw new IOException(ex);
 +        }
 +        // Remove any bulk files we've previously loaded and compacted away
 +        List<FileRef> files = MetadataTableUtil.getBulkFilesLoaded(conn, extent, tid);
 +
 +        for (FileRef file : files)
 +          if (paths.keySet().remove(file))
 +            log.debug("Ignoring request to re-import a file already imported: " + extent + ": " + file);
 +
 +        if (paths.size() > 0) {
 +          long bulkTime = Long.MIN_VALUE;
 +          if (setTime) {
 +            for (DataFileValue dfv : paths.values()) {
 +              long nextTime = tabletTime.getAndUpdateTime();
 +              if (nextTime < bulkTime)
 +                throw new IllegalStateException("Time went backwards unexpectedly " + nextTime + " " + bulkTime);
 +              bulkTime = nextTime;
 +              dfv.setTime(bulkTime);
 +            }
 +          }
 +
 +          synchronized (timeLock) {
 +            if (bulkTime > persistedTime)
 +              persistedTime = bulkTime;
 +
 +            MetadataTableUtil.updateTabletDataFile(tid, extent, paths, tabletTime.getMetadataValue(persistedTime), creds, tabletServer.getLock());
 +          }
 +        }
 +      }
 +
 +      synchronized (Tablet.this) {
 +        for (Entry<FileRef,DataFileValue> tpath : paths.entrySet()) {
 +          if (datafileSizes.containsKey(tpath.getKey())) {
 +            log.error("Adding file that is already in set " + tpath.getKey());
 +          }
 +          datafileSizes.put(tpath.getKey(), tpath.getValue());
 +
 +        }
 +
 +        tabletResources.importedMapFiles();
 +
 +        computeNumEntries();
 +      }
 +
 +      for (Entry<FileRef,DataFileValue> entry : paths.entrySet()) {
 +        log.log(TLevel.TABLET_HIST, extent + " import " + entry.getKey() + " " + entry.getValue());
 +      }
 +    }
 +
 +    FileRef reserveMergingMinorCompactionFile() {
 +      if (mergingMinorCompactionFile != null)
 +        throw new IllegalStateException("Tried to reserve merging minor compaction file when already reserved  : " + mergingMinorCompactionFile);
 +
 +      if (extent.isRootTablet())
 +        return null;
 +
 +      int maxFiles = acuTableConf.getMaxFilesPerTablet();
 +
 +      // when a major compaction is running and we are at max files, write out
 +      // one extra file... want to avoid the case where major compaction is
 +      // compacting everything except for the largest file, and therefore the
 +      // largest file is returned for merging.. the following check mostly
 +      // avoids this case, except for the case where major compactions fail or
 +      // are canceled
 +      if (majorCompactingFiles.size() > 0 && datafileSizes.size() == maxFiles)
 +        return null;
 +
 +      if (datafileSizes.size() >= maxFiles) {
 +        // find the smallest file
 +
 +        long min = Long.MAX_VALUE;
 +        FileRef minName = null;
 +
 +        for (Entry<FileRef,DataFileValue> entry : datafileSizes.entrySet()) {
 +          if (entry.getValue().getSize() < min && !majorCompactingFiles.contains(entry.getKey())) {
 +            min = entry.getValue().getSize();
 +            minName = entry.getKey();
 +          }
 +        }
 +
 +        if (minName == null)
 +          return null;
 +
 +        mergingMinorCompactionFile = minName;
 +        return minName;
 +      }
 +
 +      return null;
 +    }
 +
 +    void unreserveMergingMinorCompactionFile(FileRef file) {
 +      if ((file == null && mergingMinorCompactionFile != null) || (file != null && mergingMinorCompactionFile == null)
 +          || (file != null && mergingMinorCompactionFile != null && !file.equals(mergingMinorCompactionFile)))
 +        throw new IllegalStateException("Disagreement " + file + " " + mergingMinorCompactionFile);
 +
 +      mergingMinorCompactionFile = null;
 +    }
 +
 +    void bringMinorCompactionOnline(FileRef tmpDatafile, FileRef newDatafile, FileRef absMergeFile, DataFileValue dfv, CommitSession commitSession, long flushId)
 +        throws IOException {
 +
 +      IZooReaderWriter zoo = ZooReaderWriter.getRetryingInstance();
 +      if (extent.isRootTablet()) {
 +        try {
 +          if (!zoo.isLockHeld(tabletServer.getLock().getLockID())) {
 +            throw new IllegalStateException();
 +          }
 +        } catch (Exception e) {
 +          throw new IllegalStateException("Can not bring major compaction online, lock not held", e);
 +        }
 +      }
 +
 +      // rename before putting in metadata table, so files in metadata table should
 +      // always exist
 +      do {
 +        try {
 +          if (dfv.getNumEntries() == 0) {
 +            fs.deleteRecursively(tmpDatafile.path());
 +          } else {
 +            if (fs.exists(newDatafile.path())) {
 +              log.warn("Target map file already exist " + newDatafile);
 +              fs.deleteRecursively(newDatafile.path());
 +            }
 +            
 +            rename(fs, tmpDatafile.path(), newDatafile.path());
 +          }
 +          break;
 +        } catch (IOException ioe) {
 +          log.warn("Tablet " + extent + " failed to rename " + newDatafile + " after MinC, will retry in 60 secs...", ioe);
 +          UtilWaitThread.sleep(60 * 1000);
 +        }
 +      } while (true);
 +
 +      long t1, t2;
 +
 +      // the code below always assumes merged files are in use by scans... this must be done
 +      // because the in memory list of files is not updated until after the metadata table
 +      // therefore the file is available to scans until memory is updated, but want to ensure
 +      // the file is not available for garbage collection... if memory were updated
 +      // before this point (like major compactions do), then the following code could wait
 +      // for scans to finish like major compactions do.... used to wait for scans to finish
 +      // here, but that was incorrect because a scan could start after waiting but before
 +      // memory was updated... assuming the file is always in use by scans leads to
 +      // one uneeded metadata update when it was not actually in use
 +      Set<FileRef> filesInUseByScans = Collections.emptySet();
 +      if (absMergeFile != null)
 +        filesInUseByScans = Collections.singleton(absMergeFile);
 +
 +      // very important to write delete entries outside of log lock, because
 +      // this metadata write does not go up... it goes sideways or to itself
 +      if (absMergeFile != null)
 +        MetadataTableUtil.addDeleteEntries(extent, Collections.singleton(absMergeFile), SystemCredentials.get());
 +
 +      Set<String> unusedWalLogs = beginClearingUnusedLogs();
 +      try {
 +        // the order of writing to metadata and walog is important in the face of machine/process failures
 +        // need to write to metadata before writing to walog, when things are done in the reverse order
 +        // data could be lost... the minor compaction start even should be written before the following metadata
 +        // write is made
 +
 +        synchronized (timeLock) {
 +          if (commitSession.getMaxCommittedTime() > persistedTime)
 +            persistedTime = commitSession.getMaxCommittedTime();
 +
 +          String time = tabletTime.getMetadataValue(persistedTime);
 +          MasterMetadataUtil.updateTabletDataFile(extent, newDatafile, absMergeFile, dfv, time, SystemCredentials.get(), filesInUseByScans,
 +              tabletServer.getClientAddressString(), tabletServer.getLock(), unusedWalLogs, lastLocation, flushId);
 +        }
 +
 +      } finally {
 +        finishClearingUnusedLogs();
 +      }
 +
 +      do {
 +        try {
 +          // the purpose of making this update use the new commit session, instead of the old one passed in,
 +          // is because the new one will reference the logs used by current memory...
 +
 +          tabletServer.minorCompactionFinished(tabletMemory.getCommitSession(), newDatafile.toString(), commitSession.getWALogSeq() + 2);
 +          break;
 +        } catch (IOException e) {
 +          log.error("Failed to write to write-ahead log " + e.getMessage() + " will retry", e);
 +          UtilWaitThread.sleep(1 * 1000);
 +        }
 +      } while (true);
 +
 +      synchronized (Tablet.this) {
 +        lastLocation = null;
 +
 +        t1 = System.currentTimeMillis();
 +        if (datafileSizes.containsKey(newDatafile)) {
 +          log.error("Adding file that is already in set " + newDatafile);
 +        }
 +
 +        if (dfv.getNumEntries() > 0) {
 +          datafileSizes.put(newDatafile, dfv);
 +        }
 +
 +        if (absMergeFile != null) {
 +          datafileSizes.remove(absMergeFile);
 +        }
 +
 +        unreserveMergingMinorCompactionFile(absMergeFile);
 +
 +        dataSourceDeletions.incrementAndGet();
 +        tabletMemory.finishedMinC();
 +
 +        lastFlushID = flushId;
 +
 +        computeNumEntries();
 +        t2 = System.currentTimeMillis();
 +      }
 +
 +      // must do this after list of files in memory is updated above
 +      removeFilesAfterScan(filesInUseByScans);
 +
 +      if (absMergeFile != null)
 +        log.log(TLevel.TABLET_HIST, extent + " MinC [" + absMergeFile + ",memory] -> " + newDatafile);
 +      else
 +        log.log(TLevel.TABLET_HIST, extent + " MinC [memory] -> " + newDatafile);
 +      log.debug(String.format("MinC finish lock %.2f secs %s", (t2 - t1) / 1000.0, getExtent().toString()));
 +      if (dfv.getSize() > acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD)) {
 +        log.debug(String.format("Minor Compaction wrote out file larger than split threshold.  split threshold = %,d  file size = %,d",
 +            acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD), dfv.getSize()));
 +      }
 +
 +    }
 +
 +    public void reserveMajorCompactingFiles(Collection<FileRef> files) {
 +      if (majorCompactingFiles.size() != 0)
 +        throw new IllegalStateException("Major compacting files not empty " + majorCompactingFiles);
 +
 +      if (mergingMinorCompactionFile != null && files.contains(mergingMinorCompactionFile))
 +        throw new IllegalStateException("Major compaction tried to resrve file in use by minor compaction " + mergingMinorCompactionFile);
 +
 +      majorCompactingFiles.addAll(files);
 +    }
 +
 +    public void clearMajorCompactingFile() {
 +      majorCompactingFiles.clear();
 +    }
 +
 +    void bringMajorCompactionOnline(Set<FileRef> oldDatafiles, FileRef tmpDatafile, FileRef newDatafile, Long compactionId, DataFileValue dfv)
 +        throws IOException {
 +      long t1, t2;
 +
 +      if (!extent.isRootTablet()) {
 +
 +        if (fs.exists(newDatafile.path())) {
 +          log.error("Target map file already exist " + newDatafile, new Exception());
 +          throw new IllegalStateException("Target map file already exist " + newDatafile);
 +        }
 +
 +        // rename before putting in metadata table, so files in metadata table should
 +        // always exist
 +        rename(fs, tmpDatafile.path(), newDatafile.path());
 +        
 +        if (dfv.getNumEntries() == 0) {
 +          fs.deleteRecursively(newDatafile.path());
 +        }
 +      }
 +
 +      TServerInstance lastLocation = null;
 +      synchronized (Tablet.this) {
 +
 +        t1 = System.currentTimeMillis();
 +
 +        IZooReaderWriter zoo = ZooReaderWriter.getRetryingInstance();
 +
 +        dataSourceDeletions.incrementAndGet();
 +
 +        if (extent.isRootTablet()) {
 +
 +          waitForScansToFinish(oldDatafiles, true, Long.MAX_VALUE);
 +
 +          try {
 +            if (!zoo.isLockHeld(tabletServer.getLock().getLockID())) {
 +              throw new IllegalStateException();
 +            }
 +          } catch (Exception e) {
 +            throw new IllegalStateException("Can not bring major compaction online, lock not held", e);
 +          }
 +
 +          // mark files as ready for deletion, but
 +          // do not delete them until we successfully
 +          // rename the compacted map file, in case
 +          // the system goes down
 +
 +          RootFiles.replaceFiles(acuTableConf, fs, location, oldDatafiles, tmpDatafile, newDatafile);
 +        }
 +
 +        // atomically remove old files and add new file
 +        for (FileRef oldDatafile : oldDatafiles) {
 +          if (!datafileSizes.containsKey(oldDatafile)) {
 +            log.error("file does not exist in set " + oldDatafile);
 +          }
 +          datafileSizes.remove(oldDatafile);
 +          majorCompactingFiles.remove(oldDatafile);
 +        }
 +
 +        if (datafileSizes.containsKey(newDatafile)) {
 +          log.error("Adding file that is already in set " + newDatafile);
 +        }
 +
 +        if (dfv.getNumEntries() > 0) {
 +          datafileSizes.put(newDatafile, dfv);
 +        }
 +
 +        // could be used by a follow on compaction in a multipass compaction
 +        majorCompactingFiles.add(newDatafile);
 +
 +        computeNumEntries();
 +
 +        lastLocation = Tablet.this.lastLocation;
 +        Tablet.this.lastLocation = null;
 +
 +        if (compactionId != null)
 +          lastCompactID = compactionId;
 +
 +        t2 = System.currentTimeMillis();
 +      }
 +
 +      if (!extent.isRootTablet()) {
 +        Set<FileRef> filesInUseByScans = waitForScansToFinish(oldDatafiles, false, 10000);
 +        if (filesInUseByScans.size() > 0)
 +          log.debug("Adding scan refs to metadata " + extent + " " + filesInUseByScans);
 +        MasterMetadataUtil.replaceDatafiles(extent, oldDatafiles, filesInUseByScans, newDatafile, compactionId, dfv, SystemCredentials.get(),
 +            tabletServer.getClientAddressString(), lastLocation, tabletServer.getLock());
 +        removeFilesAfterScan(filesInUseByScans);
 +      }
 +
 +      log.debug(String.format("MajC finish lock %.2f secs", (t2 - t1) / 1000.0));
 +      log.log(TLevel.TABLET_HIST, extent + " MajC " + oldDatafiles + " --> " + newDatafile);
 +    }
 +
 +    public SortedMap<FileRef,DataFileValue> getDatafileSizes() {
 +      synchronized (Tablet.this) {
 +        TreeMap<FileRef,DataFileValue> copy = new TreeMap<FileRef,DataFileValue>(datafileSizes);
 +        return Collections.unmodifiableSortedMap(copy);
 +      }
 +    }
 +
 +    public Set<FileRef> getFiles() {
 +      synchronized (Tablet.this) {
 +        HashSet<FileRef> files = new HashSet<FileRef>(datafileSizes.keySet());
 +        return Collections.unmodifiableSet(files);
 +      }
 +    }
 +
 +  }
 +
 +  public Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm, SortedMap<Key,Value> tabletsKeyValues)
 +      throws IOException {
 +    this(tabletServer, location, extent, trm, CachedConfiguration.getInstance(), tabletsKeyValues);
 +    splitCreationTime = 0;
 +  }
 +
 +  public Tablet(KeyExtent extent, TabletServer tabletServer, TabletResourceManager trm, SplitInfo info) throws IOException {
 +    this(tabletServer, new Text(info.dir), extent, trm, CachedConfiguration.getInstance(), info.datafiles, info.time, info.initFlushID, info.initCompactID, info.lastLocation);
 +    splitCreationTime = System.currentTimeMillis();
 +  }
 +
 +  /**
 +   * Only visibile for testing
 +   */
 +  @VisibleForTesting
 +  protected Tablet(TabletTime tabletTime, String tabletDirectory, int logId, Path location, DatafileManager datafileManager) {
 +    this.tabletTime = tabletTime;
 +    this.tabletDirectory = tabletDirectory;
 +    this.logId = logId;
 +    this.location = location;
 +    this.datafileManager = datafileManager; 
 +  }
 +
 +  private Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm, Configuration conf,
 +      SortedMap<Key,Value> tabletsKeyValues) throws IOException {
 +    this(tabletServer, location, extent, trm, conf, VolumeManagerImpl.get(), tabletsKeyValues);
 +  }
 +
 +  static private final List<LogEntry> EMPTY = Collections.emptyList();
 +
 +  private Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm, Configuration conf,
 +      SortedMap<FileRef,DataFileValue> datafiles, String time, long initFlushID, long initCompactID, TServerInstance last) throws IOException {
 +    this(tabletServer, location, extent, trm, conf, VolumeManagerImpl.get(), EMPTY, datafiles, time, last, new HashSet<FileRef>(), initFlushID, initCompactID);
 +  }
 +
 +  private static String lookupTime(AccumuloConfiguration conf, KeyExtent extent, SortedMap<Key,Value> tabletsKeyValues) {
 +    SortedMap<Key,Value> entries;
 +
 +    if (extent.isRootTablet()) {
 +      return null;
 +    } else {
 +      entries = new TreeMap<Key,Value>();
 +      Text rowName = extent.getMetadataEntry();
 +      for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
 +        if (entry.getKey().compareRow(rowName) == 0 && TabletsSection.ServerColumnFamily.TIME_COLUMN.hasColumns(entry.getKey())) {
 +          entries.put(new Key(entry.getKey()), new Value(entry.getValue()));
 +        }
 +      }
 +    }
 +
 +    // log.debug("extent : "+extent+"   entries : "+entries);
 +
 +    if (entries.size() == 1)
 +      return entries.values().iterator().next().toString();
 +    return null;
 +  }
 +
 +  private static SortedMap<FileRef,DataFileValue> lookupDatafiles(AccumuloConfiguration conf, VolumeManager fs, KeyExtent extent,
 +      SortedMap<Key,Value> tabletsKeyValues) throws IOException {
 +
 +    TreeMap<FileRef,DataFileValue> datafiles = new TreeMap<FileRef,DataFileValue>();
 +
 +    if (extent.isRootTablet()) { // the meta0 tablet
 +      Path location = new Path(MetadataTableUtil.getRootTabletDir());
 +
 +      // cleanUpFiles() has special handling for delete. files
 +      FileStatus[] files = fs.listStatus(location);
 +      Collection<String> goodPaths = RootFiles.cleanupReplacement(fs, files, true);
 +      for (String good : goodPaths) {
 +        Path path = new Path(good);
 +        String filename = path.getName();
 +        FileRef ref = new FileRef(location.toString() + "/" + filename, path);
 +        DataFileValue dfv = new DataFileValue(0, 0);
 +        datafiles.put(ref, dfv);
 +      }
 +    } else {
 +
 +      Text rowName = extent.getMetadataEntry();
 +
 +      String tableId = extent.isMeta() ? RootTable.ID : MetadataTable.ID;
 +      ScannerImpl mdScanner = new ScannerImpl(HdfsZooInstance.getInstance(), SystemCredentials.get(), tableId, Authorizations.EMPTY);
 +
 +      // Commented out because when no data file is present, each tablet will scan through metadata table and return nothing
 +      // reduced batch size to improve performance
 +      // changed here after endKeys were implemented from 10 to 1000
 +      mdScanner.setBatchSize(1000);
 +
 +      // leave these in, again, now using endKey for safety
 +      mdScanner.fetchColumnFamily(DataFileColumnFamily.NAME);
 +
 +      mdScanner.setRange(new Range(rowName));
 +
 +      for (Entry<Key,Value> entry : mdScanner) {
 +
 +        if (entry.getKey().compareRow(rowName) != 0) {
 +          break;
 +        }
 +
 +        FileRef ref = new FileRef(fs, entry.getKey());
 +        datafiles.put(ref, new DataFileValue(entry.getValue().get()));
 +      }
 +    }
 +    return datafiles;
 +  }
 +
 +  private static List<LogEntry> lookupLogEntries(KeyExtent ke, SortedMap<Key,Value> tabletsKeyValues) {
 +    List<LogEntry> logEntries = new ArrayList<LogEntry>();
 +
 +    if (ke.isMeta()) {
 +      try {
 +        logEntries = MetadataTableUtil.getLogEntries(SystemCredentials.get(), ke);
 +      } catch (Exception ex) {
 +        throw new RuntimeException("Unable to read tablet log entries", ex);
 +      }
 +    } else {
 +      log.debug("Looking at metadata " + tabletsKeyValues);
 +      Text row = ke.getMetadataEntry();
 +      for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
 +        Key key = entry.getKey();
 +        if (key.getRow().equals(row)) {
 +          if (key.getColumnFamily().equals(LogColumnFamily.NAME)) {
 +            logEntries.add(LogEntry.fromKeyValue(key, entry.getValue()));
 +          }
 +        }
 +      }
 +    }
 +
 +    log.debug("got " + logEntries + " for logs for " + ke);
 +    return logEntries;
 +  }
 +
 +  private static Set<FileRef> lookupScanFiles(KeyExtent extent, SortedMap<Key,Value> tabletsKeyValues, VolumeManager fs) throws IOException {
 +    HashSet<FileRef> scanFiles = new HashSet<FileRef>();
 +
 +    Text row = extent.getMetadataEntry();
 +    for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
 +      Key key = entry.getKey();
 +      if (key.getRow().equals(row) && key.getColumnFamily().equals(ScanFileColumnFamily.NAME)) {
 +        scanFiles.add(new FileRef(fs, key));
 +      }
 +    }
 +
 +    return scanFiles;
 +  }
 +
 +  private static long lookupFlushID(KeyExtent extent, SortedMap<Key,Value> tabletsKeyValues) {
 +    Text row = extent.getMetadataEntry();
 +    for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
 +      Key key = entry.getKey();
 +      if (key.getRow().equals(row) && TabletsSection.ServerColumnFamily.FLUSH_COLUMN.equals(key.getColumnFamily(), key.getColumnQualifier()))
 +        return Long.parseLong(entry.getValue().toString());
 +    }
 +
 +    return -1;
 +  }
 +
 +  private static long lookupCompactID(KeyExtent extent, SortedMap<Key,Value> tabletsKeyValues) {
 +    Text row = extent.getMetadataEntry();
 +    for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
 +      Key key = entry.getKey();
 +      if (key.getRow().equals(row) && TabletsSection.ServerColumnFamily.COMPACT_COLUMN.equals(key.getColumnFamily(), key.getColumnQualifier()))
 +        return Long.parseLong(entry.getValue().toString());
 +    }
 +
 +    return -1;
 +  }
 +
 +  private Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm, Configuration conf, VolumeManager fs,
 +      SortedMap<Key,Value> tabletsKeyValues) throws IOException {
 +    this(tabletServer, location, extent, trm, conf, fs, lookupLogEntries(extent, tabletsKeyValues), lookupDatafiles(tabletServer.getSystemConfiguration(), fs,
 +        extent, tabletsKeyValues), lookupTime(tabletServer.getSystemConfiguration(), extent, tabletsKeyValues), lookupLastServer(extent, tabletsKeyValues),
 +        lookupScanFiles(extent, tabletsKeyValues, fs), lookupFlushID(extent, tabletsKeyValues), lookupCompactID(extent, tabletsKeyValues));
 +  }
 +
 +  private static TServerInstance lookupLastServer(KeyExtent extent, SortedMap<Key,Value> tabletsKeyValues) {
 +    for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
 +      if (entry.getKey().getColumnFamily().compareTo(TabletsSection.LastLocationColumnFamily.NAME) == 0) {
 +        return new TServerInstance(entry.getValue(), entry.getKey().getColumnQualifier());
 +      }
 +    }
 +    return null;
 +  }
 +
 +  /**
 +   * yet another constructor - this one allows us to avoid costly lookups into the Metadata table if we already know the files we need - as at split time
 +   */
 +  private Tablet(final TabletServer tabletServer, final Text location, final KeyExtent extent, final TabletResourceManager trm, final Configuration conf,
 +      final VolumeManager fs, final List<LogEntry> rawLogEntries, final SortedMap<FileRef,DataFileValue> rawDatafiles, String time,
 +      final TServerInstance lastLocation, Set<FileRef> scanFiles, long initFlushID, long initCompactID) throws IOException {
 +
 +    TabletFiles tabletPaths = VolumeUtil.updateTabletVolumes(tabletServer.getLock(), fs, extent, new TabletFiles(location.toString(), rawLogEntries,
 +        rawDatafiles));
 +
 +    Path locationPath;
 +
 +    if (tabletPaths.dir.contains(":")) {
 +      locationPath = new Path(tabletPaths.dir.toString());
 +    } else {
 +      locationPath = fs.getFullPath(FileType.TABLE, extent.getTableId().toString() + tabletPaths.dir.toString());
 +    }
 +
 +    final List<LogEntry> logEntries = tabletPaths.logEntries;
 +    final SortedMap<FileRef,DataFileValue> datafiles = tabletPaths.datafiles;
 +
 +    this.location = locationPath;
 +    this.lastLocation = lastLocation;
 +    this.tabletDirectory = tabletPaths.dir;
 +    this.conf = conf;
 +    this.acuTableConf = tabletServer.getTableConfiguration(extent);
 +
 +    this.fs = fs;
 +    this.extent = extent;
 +    this.tabletResources = trm;
 +
 +    this.lastFlushID = initFlushID;
 +    this.lastCompactID = initCompactID;
 +
 +    if (extent.isRootTablet()) {
 +      long rtime = Long.MIN_VALUE;
 +      for (FileRef ref : datafiles.keySet()) {
 +        Path path = ref.path();
 +        FileSystem ns = fs.getVolumeByPath(path).getFileSystem();
 +        FileSKVIterator reader = FileOperations.getInstance().openReader(path.toString(), true, ns, ns.getConf(), tabletServer.getTableConfiguration(extent));
 +        long maxTime = -1;
 +        try {
 +
 +          while (reader.hasTop()) {
 +            maxTime = Math.max(maxTime, reader.getTopKey().getTimestamp());
 +            reader.next();
 +          }
 +
 +        } finally {
 +          reader.close();
 +        }
 +
 +        if (maxTime > rtime) {
 +          time = TabletTime.LOGICAL_TIME_ID + "" + maxTime;
 +          rtime = maxTime;
 +        }
 +      }
 +    }
 +    if (time == null && datafiles.isEmpty() && extent.equals(RootTable.OLD_EXTENT)) {
 +      // recovery... old root tablet has no data, so time doesn't matter:
 +      time = TabletTime.LOGICAL_TIME_ID + "" + Long.MIN_VALUE;
 +    }
 +
 +    this.tabletServer = tabletServer;
 +    this.logId = tabletServer.createLogId(extent);
 +
 +    this.timer = new TabletStatsKeeper();
 +
 +    setupDefaultSecurityLabels(extent);
 +
 +    tabletMemory = new TabletMemory();
 +    tabletTime = TabletTime.getInstance(time);
 +    persistedTime = tabletTime.getTime();
 +
 +    acuTableConf.addObserver(configObserver = new ConfigurationObserver() {
 +
 +      private void reloadConstraints() {
 +        constraintChecker.set(new ConstraintChecker(acuTableConf));
 +      }
 +
 +      @Override
 +      public void propertiesChanged() {
 +        reloadConstraints();
 +
 +        try {
 +          setupDefaultSecurityLabels(extent);
 +        } catch (Exception e) {
 +          log.error("Failed to reload default security labels for extent: " + extent.toString());
 +        }
 +      }
 +
 +      @Override
 +      public void propertyChanged(String prop) {
 +        if (prop.startsWith(Property.TABLE_CONSTRAINT_PREFIX.getKey()))
 +          reloadConstraints();
 +        else if (prop.equals(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY.getKey())) {
 +          try {
 +            log.info("Default security labels changed for extent: " + extent.toString());
 +            setupDefaultSecurityLabels(extent);
 +          } catch (Exception e) {
 +            log.error("Failed to reload default security labels for extent: " + extent.toString());
 +          }
 +        }
 +
 +      }
 +
 +      @Override
 +      public void sessionExpired() {
 +        log.debug("Session expired, no longer updating per table props...");
 +      }
 +
 +    });
 +    
 +    acuTableConf.getNamespaceConfiguration().addObserver(configObserver);
 +    
 +    // Force a load of any per-table properties
 +    configObserver.propertiesChanged();
 +
 +    tabletResources.setTablet(this, acuTableConf);
 +    if (!logEntries.isEmpty()) {
 +      log.info("Starting Write-Ahead Log recovery for " + this.extent);
 +      // count[0] = entries used on tablet
 +      // count[1] = track max time from walog entries wihtout timestamps
 +      final long[] count = new long[2];
 +      final CommitSession commitSession = tabletMemory.getCommitSession();
 +      count[1] = Long.MIN_VALUE;
 +      try {
 +        Set<String> absPaths = new HashSet<String>();
 +        for (FileRef ref : datafiles.keySet())
 +          absPaths.add(ref.path().toString());
 +
 +        tabletServer.recover(this.tabletServer.getFileSystem(), this, logEntries, absPaths, new MutationReceiver() {
 +          @Override
 +          public void receive(Mutation m) {
 +            // LogReader.printMutation(m);
 +            Collection<ColumnUpdate> muts = m.getUpdates();
 +            for (ColumnUpdate columnUpdate : muts) {
 +              if (!columnUpdate.hasTimestamp()) {
 +                // if it is not a user set timestamp, it must have been set
 +                // by the system
 +                count[1] = Math.max(count[1], columnUpdate.getTimestamp());
 +              }
 +            }
 +            tabletMemory.mutate(commitSession, Collections.singletonList(m));
 +            count[0]++;
 +          }
 +        });
 +
 +        if (count[1] != Long.MIN_VALUE) {
 +          tabletTime.useMaxTimeFromWALog(count[1]);
 +        }
 +        commitSession.updateMaxCommittedTime(tabletTime.getTime());
 +
 +        if (count[0] == 0) {
 +          log.debug("No replayed mutations applied, removing unused entries for " + extent);
 +          MetadataTableUtil.removeUnusedWALEntries(extent, logEntries, tabletServer.getLock());
 +          logEntries.clear();
 +        }
 +
 +      } catch (Throwable t) {
 +        if (acuTableConf.getBoolean(Property.TABLE_FAILURES_IGNORE)) {
 +          log.warn("Error recovering from log files: ", t);
 +        } else {
 +          throw new RuntimeException(t);
 +        }
 +      }
 +      // make some closed references that represent the recovered logs
 +      currentLogs = new HashSet<DfsLogger>();
 +      for (LogEntry logEntry : logEntries) {
 +        for (String log : logEntry.logSet) {
 +          currentLogs.add(new DfsLogger(tabletServer.getServerConfig(), log, logEntry.getColumnQualifier().toString()));
 +        }
 +      }
 +
 +      log.info("Write-Ahead Log recovery complete for " + this.extent + " (" + count[0] + " mutations applied, " + tabletMemory.getNumEntries()
 +          + " entries created)");
 +    }
 +
 +    String contextName = acuTableConf.get(Property.TABLE_CLASSPATH);
 +    if (contextName != null && !contextName.equals("")) {
 +      // initialize context classloader, instead of possibly waiting for it to initialize for a scan
 +      // TODO this could hang, causing other tablets to fail to load - ACCUMULO-1292
 +      AccumuloVFSClassLoader.getContextManager().getClassLoader(contextName);
 +    }
 +
 +    // do this last after tablet is completely setup because it
 +    // could cause major compaction to start
 +    datafileManager = new DatafileManager(datafiles);
 +
 +    computeNumEntries();
 +
 +    datafileManager.removeFilesAfterScan(scanFiles);
 +
 +    // look for hints of a failure on the previous tablet server
 +    if (!logEntries.isEmpty() || needsMajorCompaction(MajorCompactionReason.NORMAL)) {
 +      // look for any temp files hanging around
 +      removeOldTemporaryFiles();
 +    }
 +
 +    log.log(TLevel.TABLET_HIST, extent + " opened");
 +  }
 +
 +  private void removeOldTemporaryFiles() {
 +    // remove any temporary files created by a previous tablet server
 +    try {
 +      for (FileStatus tmp : fs.globStatus(new Path(location, "*_tmp"))) {
 +        try {
 +          log.debug("Removing old temp file " + tmp.getPath());
 +          fs.delete(tmp.getPath());
 +        } catch (IOException ex) {
 +          log.error("Unable to remove old temp file " + tmp.getPath() + ": " + ex);
 +        }
 +      }
 +    } catch (IOException ex) {
 +      log.error("Error scanning for old temp files in " + location);
 +    }
 +  }
 +
 +  private void setupDefaultSecurityLabels(KeyExtent extent) {
 +    if (extent.isMeta()) {
 +      defaultSecurityLabel = new byte[0];
 +    } else {
 +      try {
 +        ColumnVisibility cv = new ColumnVisibility(acuTableConf.get(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY));
 +        this.defaultSecurityLabel = cv.getExpression();
 +      } catch (Exception e) {
 +        log.error(e, e);
 +        this.defaultSecurityLabel = new byte[0];
 +      }
 +    }
 +  }
 +
 +  public static class KVEntry extends KeyValue {
 +    public KVEntry(Key k, Value v) {
 +      super(new Key(k), Arrays.copyOf(v.get(), v.get().length));
 +    }
 +
 +    @Override
 +    public String toString() {
 +      return key.toString() + "=" + getValue();
 +    }
 +
 +    int numBytes() {
 +      return key.getSize() + getValue().get().length;
 +    }
 +
 +    int estimateMemoryUsed() {
 +      return key.getSize() + getValue().get().length + (9 * 32); // overhead is 32 per object
 +    }
 +  }
 +
 +  private LookupResult lookup(SortedKeyValueIterator<Key,Value> mmfi, List<Range> ranges, HashSet<Column> columnSet, ArrayList<KVEntry> results,
 +      long maxResultsSize) throws IOException {
 +
 +    LookupResult lookupResult = new LookupResult();
 +
 +    boolean exceededMemoryUsage = false;
 +    boolean tabletClosed = false;
 +
 +    Set<ByteSequence> cfset = null;
 +    if (columnSet.size() > 0)
 +      cfset = LocalityGroupUtil.families(columnSet);
 +
 +    for (Range range : ranges) {
 +
 +      if (exceededMemoryUsage || tabletClosed) {
 +        lookupResult.unfinishedRanges.add(range);
 +        continue;
 +      }
 +
 +      int entriesAdded = 0;
 +
 +      try {
 +        if (cfset != null)
 +          mmfi.seek(range, cfset, true);
 +        else
 +          mmfi.seek(range, LocalityGroupUtil.EMPTY_CF_SET, false);
 +
 +        while (mmfi.hasTop()) {
 +          Key key = mmfi.getTopKey();
 +
 +          KVEntry kve = new KVEntry(key, mmfi.getTopValue());
 +          results.add(kve);
 +          entriesAdded++;
 +          lookupResult.bytesAdded += kve.estimateMemoryUsed();
 +          lookupResult.dataSize += kve.numBytes();
 +
 +          exceededMemoryUsage = lookupResult.bytesAdded > maxResultsSize;
 +
 +          if (exceededMemoryUsage) {
 +            addUnfinishedRange(lookupResult, range, key, false);
 +            break;
 +          }
 +
 +          mmfi.next();
 +        }
 +
 +      } catch (TooManyFilesException tmfe) {
 +        // treat this as a closed tablet, and let the client retry
 +        log.warn("Tablet " + getExtent() + " has too many files, batch lookup can not run");
 +        handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded);
 +        tabletClosed = true;
 +      } catch (IOException ioe) {
 +        if (shutdownInProgress()) {
 +          // assume HDFS shutdown hook caused this exception
 +          log.debug("IOException while shutdown in progress ", ioe);
 +          handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded);
 +          tabletClosed = true;
 +        } else {
 +          throw ioe;
 +        }
 +      } catch (IterationInterruptedException iie) {
 +        if (isClosed()) {
 +          handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded);
 +          tabletClosed = true;
 +        } else {
 +          throw iie;
 +        }
 +      } catch (TabletClosedException tce) {
 +        handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded);
 +        tabletClosed = true;
 +      }
 +
 +    }
 +
 +    return lookupResult;
 +  }
 +
 +  private void handleTabletClosedDuringScan(ArrayList<KVEntry> results, LookupResult lookupResult, boolean exceededMemoryUsage, Range range, int entriesAdded) {
 +    if (exceededMemoryUsage)
 +      throw new IllegalStateException("tablet should not exceed memory usage or close, not both");
 +
 +    if (entriesAdded > 0)
 +      addUnfinishedRange(lookupResult, range, results.get(results.size() - 1).key, false);
 +    else
 +      lookupResult.unfinishedRanges.add(range);
 +
 +    lookupResult.closed = true;
 +  }
 +
 +  private void addUnfinishedRange(LookupResult lookupResult, Range range, Key key, boolean inclusiveStartKey) {
 +    if (range.getEndKey() == null || key.compareTo(range.getEndKey()) < 0) {
 +      Range nlur = new Range(new Key(key), inclusiveStartKey, range.getEndKey(), range.isEndKeyInclusive());
 +      lookupResult.unfinishedRanges.add(nlur);
 +    }
 +  }
 +
 +  public static interface KVReceiver {
 +    void receive(List<KVEntry> matches) throws IOException;
 +  }
 +
 +  class LookupResult {
 +    List<Range> unfinishedRanges = new ArrayList<Range>();
 +    long bytesAdded = 0;
 +    long dataSize = 0;
 +    boolean closed = false;
 +  }
 +
 +  public LookupResult lookup(List<Range> ranges, HashSet<Column> columns, Authorizations authorizations, ArrayList<KVEntry> results, long maxResultSize,
 +      List<IterInfo> ssiList, Map<String,Map<String,String>> ssio, AtomicBoolean interruptFlag) throws IOException {
 +
 +    if (ranges.size() == 0) {
 +      return new LookupResult();
 +    }
 +
 +    ranges = Range.mergeOverlapping(ranges);
 +    Collections.sort(ranges);
 +
 +    Range tabletRange = extent.toDataRange();
 +    for (Range range : ranges) {
 +      // do a test to see if this range falls within the tablet, if it does not
 +      // then clip will throw an exception
 +      tabletRange.clip(range);
 +    }
 +
 +    ScanDataSource dataSource = new ScanDataSource(authorizations, this.defaultSecurityLabel, columns, ssiList, ssio, interruptFlag);
 +
 +    LookupResult result = null;
 +
 +    try {
 +      SortedKeyValueIterator<Key,Value> iter = new SourceSwitchingIterator(dataSource);
 +      result = lookup(iter, ranges, columns, results, maxResultSize);
 +      return result;
 +    } catch (IOException ioe) {
 +      dataSource.close(true);
 +      throw ioe;
 +    } finally {
 +      // code in finally block because always want
 +      // to return mapfiles, even when exception is thrown
 +      dataSource.close(false);
 +
 +      synchronized (this) {
 +        queryCount += results.size();
 +        if (result != null)
 +          queryBytes += result.dataSize;
 +      }
 +    }
 +  }
 +
 +  private Batch nextBatch(SortedKeyValueIterator<Key,Value> iter, Range range, int num, Set<Column> columns) throws IOException {
 +
 +    // log.info("In nextBatch..");
 +
 +    List<KVEntry> results = new ArrayList<KVEntry>();
 +    Key key = null;
 +
 +    Value value;
 +    long resultSize = 0L;
 +    long resultBytes = 0L;
 +
 +    long maxResultsSize = acuTableConf.getMemoryInBytes(Property.TABLE_SCAN_MAXMEM);
 +
 +    if (columns.size() == 0) {
 +      iter.seek(range, LocalityGroupUtil.EMPTY_CF_SET, false);
 +    } else {
 +      iter.seek(range, LocalityGroupUtil.families(columns), true);
 +    }
 +
 +    Key continueKey = null;
 +    boolean skipContinueKey = false;
 +
 +    boolean endOfTabletReached = false;
 +    while (iter.hasTop()) {
 +
 +      value = iter.getTopValue();
 +      key = iter.getTopKey();
 +
 +      KVEntry kvEntry = new KVEntry(key, value); // copies key and value
 +      results.add(kvEntry);
 +      resultSize += kvEntry.estimateMemoryUsed();
 +      resultBytes += kvEntry.numBytes();
 +
 +      if (resultSize >= maxResultsSize || results.size() >= num) {
 +        continueKey = new Key(key);
 +        skipContinueKey = true;
 +        break;
 +      }
 +
 +      iter.next();
 +    }
 +
 +    if (iter.hasTop() == false) {
 +      endOfTabletReached = true;
 +    }
 +
 +    Batch retBatch = new Batch();
 +    retBatch.numBytes = resultBytes;
 +
 +    if (!endOfTabletReached) {
 +      retBatch.continueKey = continueKey;
 +      retBatch.skipContinueKey = skipContinueKey;
 +    } else {
 +      retBatch.continueKey = null;
 +    }
 +
 +    if (endOfTabletReached && results.size() == 0)
 +      retBatch.results = null;
 +    else
 +      retBatch.results = results;
 +
 +    return retBatch;
 +  }
 +
 +  /**
 +   * Determine if a JVM shutdown is in progress.
 +   * 
 +   */
 +  private boolean shutdownInProgress() {
 +    try {
 +      Runtime.getRuntime().removeShutdownHook(new Thread(new Runnable() {
 +        @Override
 +        public void run() {}
 +      }));
 +    } catch (IllegalStateException ise) {
 +      return true;
 +    }
 +
 +    return false;
 +  }
 +
 +  private class Batch {
 +    public boolean skipContinueKey;
 +    public List<KVEntry> results;
 +    public Key continueKey;
 +    public long numBytes;
 +  }
 +
 +  Scanner createScanner(Range range, int num, Set<Column> columns, Authorizations authorizations, List<IterInfo> ssiList, Map<String,Map<String,String>> ssio,
 +      boolean isolated, AtomicBoolean interruptFlag) {
 +    // do a test to see if this range falls within the tablet, if it does not
 +    // then clip will throw an exception
 +    extent.toDataRange().clip(range);
 +
 +    ScanOptions opts = new ScanOptions(num, authorizations, this.defaultSecurityLabel, columns, ssiList, ssio, interruptFlag, isolated);
 +    return new Scanner(range, opts);
 +  }
 +
 +  class ScanBatch {
 +    boolean more;
 +    List<KVEntry> results;
 +
 +    ScanBatch(List<KVEntry> results, boolean more) {
 +      this.results = results;
 +      this.more = more;
 +    }
 +  }
 +
 +  class Scanner {
 +
 +    private ScanOptions options;
 +    private Range range;
 +    private SortedKeyValueIterator<Key,Value> isolatedIter;
 +    private ScanDataSource isolatedDataSource;
 +    private boolean sawException = false;
 +    private boolean scanClosed = false;
 +
 +    Scanner(Range range, ScanOptions options) {
 +      this.range = range;
 +      this.options = options;
 +    }
 +
 +    synchronized ScanBatch read() throws IOException, TabletClosedException {
 +
 +      if (sawException)
 +        throw new IllegalStateException("Tried to use scanner after exception occurred.");
 +
 +      if (scanClosed)
 +        throw new IllegalStateException("Tried to use scanner after it was closed.");
 +
 +      Batch results = null;
 +
 +      ScanDataSource dataSource;
 +
 +      if (options.isolated) {
 +        if (isolatedDataSource == null)
 +          isolatedDataSource = new ScanDataSource(options);
 +        dataSource = isolatedDataSource;
 +      } else {
 +        dataSource = new ScanDataSource(options);
 +      }
 +
 +      try {
 +
 +        SortedKeyValueIterator<Key,Value> iter;
 +
 +        if (options.isolated) {
 +          if (isolatedIter == null)
 +            isolatedIter = new SourceSwitchingIterator(dataSource, true);
 +          else
 +            isolatedDataSource.fileManager.reattach();
 +          iter = isolatedIter;
 +        } else {
 +          iter = new SourceSwitchingIterator(dataSource, false);
 +        }
 +
 +        results = nextBatch(iter, range, options.num, options.columnSet);
 +
 +        if (results.results == null) {
 +          range = null;
 +          return new ScanBatch(new ArrayList<Tablet.KVEntry>(), false);
 +        } else if (results.continueKey == null) {
 +          return new ScanBatch(results.results, false);
 +        } else {
 +          range = new Range(results.continueKey, !results.skipContinueKey, range.getEndKey(), range.isEndKeyInclusive());
 +          return new ScanBatch(results.results, true);
 +        }
 +
 +      } catch (IterationInterruptedException iie) {
 +        sawException = true;
 +        if (isClosed())
 +          throw new TabletClosedException(iie);
 +        else
 +          throw iie;
 +      } catch (IOException ioe) {
 +        if (shutdownInProgress()) {
 +          log.debug("IOException while shutdown in progress ", ioe);
 +          throw new TabletClosedException(ioe); // assume IOException was caused by execution of HDFS shutdown hook
 +        }
 +
 +        sawException = true;
 +        dataSource.close(true);
 +        throw ioe;
 +      } catch (RuntimeException re) {
 +        sawException = true;
 +        throw re;
 +      } finally {
 +        // code in finally block because always want
 +        // to return mapfiles, even when exception is thrown
 +        if (!options.isolated)
 +          dataSource.close(false);
 +        else if (dataSource.fileManager != null)
 +          dataSource.fileManager.detach();
 +
 +        synchronized (Tablet.this) {
 +          if (results != null && results.results != null) {
 +            long more = results.results.size();
 +            queryCount += more;
 +            queryBytes += results.numBytes;
 +          }
 +        }
 +      }
 +    }
 +
 +    // close and read are synchronized because can not call close on the data source while it is in use
 +    // this cloud lead to the case where file iterators that are in use by a thread are returned
 +    // to the pool... this would be bad
 +    void close() {
 +      options.interruptFlag.set(true);
 +      synchronized (this) {
 +        scanClosed = true;
 +        if (isolatedDataSource != null)
 +          isolatedDataSource.close(false);
 +      }
 +    }
 +  }
 +
 +  static class ScanOptions {
 +
 +    // scan options
 +    Authorizations authorizations;
 +    byte[] defaultLabels;
 +    Set<Column> columnSet;
 +    List<IterInfo> ssiList;
 +    Map<String,Map<String,String>> ssio;
 +    AtomicBoolean interruptFlag;
 +    int num;
 +    boolean isolated;
 +
 +    ScanOptions(int num, Authorizations authorizations, byte[] defaultLabels, Set<Column> columnSet, List<IterInfo> ssiList,
 +        Map<String,Map<String,String>> ssio, AtomicBoolean interruptFlag, boolean isolated) {
 +      this.num = num;
 +      this.authorizations = authorizations;
 +      this.defaultLabels = defaultLabels;
 +      this.columnSet = columnSet;
 +      this.ssiList = ssiList;
 +      this.ssio = ssio;
 +      this.interruptFlag = interruptFlag;
 +      this.isolated = isolated;
 +    }
 +
 +  }
 +
 +  class ScanDataSource implements DataSource {
 +
 +    // data source state
 +    private ScanFileManager fileManager;
 +    private SortedKeyValueIterator<Key,Value> iter;
 +    private long expectedDeletionCount;
 +    private List<MemoryIterator> memIters = null;
 +    private long fileReservationId;
 +    private AtomicBoolean interruptFlag;
 +    private StatsIterator statsIterator;
 +
 +    ScanOptions options;
 +
 +    ScanDataSource(Authorizations authorizations, byte[] defaultLabels, HashSet<Column> columnSet, List<IterInfo> ssiList, Map<String,Map<String,String>> ssio,
 +        AtomicBoolean interruptFlag) {
 +      expectedDeletionCount = dataSourceDeletions.get();
 +      this.options = new ScanOptions(-1, authorizations, defaultLabels, columnSet, ssiList, ssio, interruptFlag, false);
 +      this.interruptFlag = interruptFlag;
 +    }
 +
 +    ScanDataSource(ScanOptions options) {
 +      expectedDeletionCount = dataSourceDeletions.get();
 +      this.options = options;
 +      this.interruptFlag = options.interruptFlag;
 +    }
 +
 +    @Override
 +    public DataSource getNewDataSource() {
 +      if (!isCurrent()) {
 +        // log.debug("Switching data sources during a scan");
 +        if (memIters != null) {
 +          tabletMemory.returnIterators(memIters);
 +          memIters = null;
 +          datafileManager.returnFilesForScan(fileReservationId);
 +          fileReservationId = -1;
 +        }
 +
 +        if (fileManager != null)
 +          fileManager.releaseOpenFiles(false);
 +
 +        expectedDeletionCount = dataSourceDeletions.get();
 +        iter = null;
 +
 +        return this;
 +      } else
 +        return this;
 +    }
 +
 +    @Override
 +    public boolean isCurrent() {
 +      return expectedDeletionCount == dataSourceDeletions.get();
 +    }
 +
 +    @Override
 +    public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
 +      if (iter == null)
 +        iter = createIterator();
 +      return iter;
 +    }
 +
 +    private SortedKeyValueIterator<Key,Value> createIterator() throws IOException {
 +
 +      Map<FileRef,DataFileValue> files;
 +
 +      synchronized (Tablet.this) {
 +
 +        if (memIters != null)
 +          throw new IllegalStateException("Tried to create new scan iterator w/o releasing memory");
 +
 +        if (Tablet.this.closed)
 +          throw new TabletClosedException();
 +
 +        if (interruptFlag.get())
 +          throw new IterationInterruptedException(extent.toString() + " " + interruptFlag.hashCode());
 +
 +        // only acquire the file manager when we know the tablet is open
 +        if (fileManager == null) {
 +          fileManager = tabletResources.newScanFileManager();
 +          activeScans.add(this);
 +        }
 +
 +        if (fileManager.getNumOpenFiles() != 0)
 +          throw new IllegalStateException("Tried to create new scan iterator w/o releasing files");
 +
 +        // set this before trying to get iterators in case
 +        // getIterators() throws an exception
 +        expectedDeletionCount = dataSourceDeletions.get();
 +
 +        memIters = tabletMemory.getIterators();
 +        Pair<Long,Map<FileRef,DataFileValue>> reservation = datafileManager.reserveFilesForScan();
 +        fileReservationId = reservation.getFirst();
 +        files = reservation.getSecond();
 +      }
 +
 +      Collection<InterruptibleIterator> mapfiles = fileManager.openFiles(files, options.isolated);
 +
 +      List<SortedKeyValueIterator<Key,Value>> iters = new ArrayList<SortedKeyValueIterator<Key,Value>>(mapfiles.size() + memIters.size());
 +
 +      iters.addAll(mapfiles);
 +      iters.addAll(memIters);
 +
 +      for (SortedKeyValueIterator<Key,Value> skvi : iters)
 +        ((InterruptibleIterator) skvi).setInterruptFlag(interruptFlag);
 +
 +      MultiIterator multiIter = new MultiIterator(iters, extent);
 +
 +      TabletIteratorEnvironment iterEnv = new TabletIteratorEnvironment(IteratorScope.scan, acuTableConf, fileManager, files);
 +
 +      statsIterator = new StatsIterator(multiIter, TabletServer.seekCount, scannedCount);
 +
 +      DeletingIterator delIter = new DeletingIterator(statsIterator, false);
 +
 +      ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);
 +
 +      ColumnQualifierFilter colFilter = new ColumnQualifierFilter(cfsi, options.columnSet);
 +
 +      VisibilityFilter visFilter = new VisibilityFilter(colFilter, options.authorizations, options.defaultLabels);
 +
 +      return iterEnv.getTopLevelIterator(IteratorUtil
 +          .loadIterators(IteratorScope.scan, visFilter, extent, acuTableConf, options.ssiList, options.ssio, iterEnv));
 +    }
 +
 +    private void close(boolean sawErrors) {
 +
 +      if (memIters != null) {
 +        tabletMemory.returnIterators(memIters);
 +        memIters = null;
 +        datafileManager.returnFilesForScan(fileReservationId);
 +        fileReservationId = -1;
 +      }
 +
 +      synchronized (Tablet.this) {
 +        activeScans.remove(this);
 +        if (activeScans.size() == 0)
 +          Tablet.this.notifyAll();
 +      }
 +
 +      if (fileManager != null) {
 +        fileManager.releaseOpenFiles(sawErrors);
 +        fileManager = null;
 +      }
 +
 +      if (statsIterator != null) {
 +        statsIterator.report();
 +      }
 +
 +    }
 +
 +    public void interrupt() {
 +      interruptFlag.set(true);
 +    }
 +
 +    @Override
 +    public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
 +      throw new UnsupportedOperationException();
 +    }
 +
++    @Override
++    public void setInterruptFlag(AtomicBoolean flag) {
++      throw new UnsupportedOperationException();
++    }
++    
 +  }
 +
 +  private DataFileValue minorCompact(Configuration conf, VolumeManager fs, InMemoryMap memTable, FileRef tmpDatafile, FileRef newDatafile, FileRef mergeFile,
 +      boolean hasQueueTime, long queued, CommitSession commitSession, long flushId, MinorCompactionReason mincReason) {
 +    boolean failed = false;
 +    long start = System.currentTimeMillis();
 +    timer.incrementStatusMinor();
 +
 +    long count = 0;
 +
 +    try {
 +      Span span = Trace.start("write");
 +      CompactionStats stats;
 +      try {
 +        count = memTable.getNumEntries();
 +
 +        DataFileValue dfv = null;
 +        if (mergeFile != null)
 +          dfv = datafileManager.getDatafileSizes().get(mergeFile);
 +
 +        MinorCompactor compactor = new MinorCompactor(conf, fs, memTable, mergeFile, dfv, tmpDatafile, acuTableConf, extent, mincReason);
 +        stats = compactor.call();
 +      } finally {
 +        span.stop();
 +      }
 +      span = Trace.start("bringOnline");
 +      try {
 +        datafileManager.bringMinorCompactionOnline(tmpDatafile, newDatafile, mergeFile, new DataFileValue(stats.getFileSize(), stats.getEntriesWritten()),
 +            commitSession, flushId);
 +      } finally {
 +        span.stop();
 +      }
 +      return new DataFileValue(stats.getFileSize(), stats.getEntriesWritten());
 +    } catch (Exception E) {
 +      failed = true;
 +      throw new RuntimeException(E);
 +    } catch (Error E) {
 +      // Weird errors like "OutOfMemoryError" when trying to create the thread for the compaction
 +      failed = true;
 +      throw new RuntimeException(E);
 +    } finally {
 +      try {
 +        tabletMemory.finalizeMinC();
 +      } catch (Throwable t) {
 +        log.error("Failed to free tablet memory", t);
 +      }
 +
 +      if (!failed) {
 +        lastMinorCompactionFinishTime = System.currentTimeMillis();
 +      }
 +      if (tabletServer.mincMetrics.isEnabled())
 +        tabletServer.mincMetrics.add(TabletServerMinCMetrics.minc, (lastMinorCompactionFinishTime - start));
 +      if (hasQueueTime) {
 +        timer.updateTime(Operation.MINOR, queued, start, count, failed);
 +        if (tabletServer.mincMetrics.isEnabled())
 +          tabletServer.mincMetrics.add(TabletServerMinCMetrics.queue, (start - queued));
 +      } else
 +        timer.updateTime(Operation.MINOR, start, count, failed);
 +    }
 +  }
 +
 +  private class MinorCompactionTask implements Runnable {
 +
 +    private long queued;
 +    private CommitSession commitSession;
 +    private DataFileValue stats;
 +    private FileRef mergeFile;
 +    private long flushId;
 +    private MinorCompactionReason mincReason;
 +
 +    MinorCompactionTask(FileRef mergeFile, CommitSession commitSession, long flushId, MinorCompactionReason mincReason) {
 +      queued = System.currentTimeMillis();
 +      minorCompactionWaitingToStart = true;
 +      this.commitSession = commitSession;
 +      this.mergeFile = mergeFile;
 +      this.flushId = flushId;
 +      this.mincReason = mincReason;
 +    }
 +
 +    @Override
 +    public void run() {
 +      minorCompactionWaitingToStart = false;
 +      minorCompactionInProgress = true;
 +      Span minorCompaction = Trace.on("minorCompaction");
 +      try {
 +        FileRef newMapfileLocation = getNextMapFilename(mergeFile == null ? "F" : "M");
 +        FileRef tmpFileRef = new FileRef(newMapfileLocation.path() + "_tmp");
 +        Span span = Trace.start("waitForCommits");
 +        synchronized (Tablet.this) {
 +          commitSession.waitForCommitsToFinish();
 +        }
 +        span.stop();
 +        span = Trace.start("start");
 +        while (true) {
 +          try {
 +            // the purpose of the minor compaction start event is to keep track of the filename... in the case
 +            // where the metadata table write for the minor compaction finishes and the process dies before
 +            // writing the minor compaction finish event, then the start event+filename in metadata table will
 +            // prevent recovery of duplicate data... the minor compaction start event could be written at any time
 +            // before the metadata write for the minor compaction
 +            tabletServer.minorCompactionStarted(commitSession, commitSession.getWALogSeq() + 1, newMapfileLocation.path().toString());
 +            break;
 +          } catch (IOException e) {
 +            log.warn("Failed to write to write ahead log " + e.getMessage(), e);
 +          }
 +        }
 +        span.stop();
 +        span = Trace.start("compact");
 +        this.stats = minorCompact(conf, fs, tabletMemory.getMinCMemTable(), tmpFileRef, newMapfileLocation, mergeFile, true, queued, commitSession, flushId,
 +            mincReason);
 +        span.stop();
 +
 +        if (needsSplit()) {
 +          tabletServer.executeSplit(Tablet.this);
 +        } else {
 +          initiateMajorCompaction(MajorCompactionReason.NORMAL);
 +        }
 +      } catch (Throwable t) {
 +        log.error("Unknown error during minor compaction for extent: " + getExtent(), t);
 +        throw new RuntimeException(t);
 +      } finally {
 +        minorCompactionInProgress = false;
 +        minorCompaction.data("extent", extent.toString());
 +        minorCompaction.data("numEntries", Long.toString(this.stats.getNumEntries()));
 +        minorCompaction.data("size", Long.toString(this.stats.getSize()));
 +        minorCompaction.stop();
 +      }
 +    }
 +  }
 +
 +  private synchronized MinorCompactionTask prepareForMinC(long flushId, MinorCompactionReason mincReason) {
 +    CommitSession oldCommitSession = tabletMemory.prepareForMinC();
 +    otherLogs = currentLogs;
 +    currentLogs = new HashSet<DfsLogger>();
 +
 +    FileRef mergeFile = null;
 +    if (mincReason != MinorCompactionReason.RECOVERY) {
 +      mergeFile = datafileManager.reserveMergingMinorCompactionFile();
 +    }
 +
 +    return new MinorCompactionTask(mergeFile, oldCommitSession, flushId, mincReason);
 +
 +  }
 +
 +  void flush(long tableFlushID) {
 +    boolean updateMetadata = false;
 +    boolean initiateMinor = false;
 +
 +    try {
 +
 +      synchronized (this) {
 +
 +        // only want one thing at a time to update flush ID to ensure that metadata table and tablet in memory state are consistent
 +        if (updatingFlushID)
 +          return;
 +
 +        if (lastFlushID >= tableFlushID)
 +          return;
 +
 +        if (closing || closed || tabletMemory.memoryReservedForMinC())
 +          return;
 +
 +        if (tabletMemory.getMemTable().getNumEntries() == 0) {
 +          lastFlushID = tableFlushID;
 +          updatingFlushID = true;
 +          updateMetadata = true;
 +        } else
 +          initiateMinor = true;
 +      }
 +
 +      if (updateMetadata) {
 +        Credentials creds = SystemCredentials.get();
 +        // if multiple threads were allowed to update this outside of a sync block, then it would be
 +        // a race condition
 +        MetadataTableUtil.updateTabletFlushID(extent, tableFlushID, creds, tabletServer.getLock());
 +      } else if (initiateMinor)
 +        initiateMinorCompaction(tableFlushID, MinorCompactionReason.USER);
 +
 +    } finally {
 +      if (updateMetadata) {
 +        synchronized (this) {
 +          updatingFlushID = false;
 +          this.notifyAll();
 +        }
 +      }
 +    }
 +
 +  }
 +
 +  boolean initiateMinorCompaction(MinorCompactionReason mincReason) {
 +    if (isClosed()) {
 +      // don't bother trying to get flush id if closed... could be closed after this check but that is ok... just trying to cut down on uneeded log messages....
 +      return false;
 +    }
 +
 +    // get the flush id before the new memmap is made available for write
 +    long flushId;
 +    try {
 +      flushId = getFlushID();
 +    } catch (NoNodeException e) {
 +      log.info("Asked to initiate MinC when there was no flush id " + getExtent() + " " + e.getMessage());
 +      return false;
 +    }
 +    return initiateMinorCompaction(flushId, mincReason);
 +  }
 +
 +  boolean minorCompactNow(MinorCompactionReason mincReason) {
 +    long flushId;
 +    try {
 +      flushId = getFlushID();
 +    } catch (NoNodeException e) {
 +      log.info("Asked to initiate MinC when there was no flush id " + getExtent() + " " + e.getMessage());
 +      return false;
 +    }
 +    MinorCompactionTask mct = createMinorCompactionTask(flushId, mincReason);
 +    if (mct == null)
 +      return false;
 +    mct.run();
 +    return true;
 +  }
 +
 +  boolean initiateMinorCompaction(long flushId, MinorCompactionReason mincReason) {
 +    MinorCompactionTask mct = createMinorCompactionTask(flushId, mincReason);
 +    if (mct == null)
 +      return false;
 +    tabletResources.executeMinorCompaction(mct);
 +    return true;
 +  }
 +
 +  private MinorCompactionTask createMinorCompactionTask(long flushId, MinorCompactionReason mincReason) {
 +    MinorCompactionTask mct;
 +    long t1, t2;
 +
 +    StringBuilder logMessage = null;
 +
 +    try {
 +      synchronized (this) {
 +        t1 = System.currentTimeMillis();
 +
 +        if (closing || closed || majorCompactionWaitingToStart || tabletMemory.memoryReservedForMinC() || tabletMemory.getMemTable().getNumEntries() == 0
 +            || updatingFlushID) {
 +
 +          logMessage = new StringBuilder();
 +
 +          logMessage.append(extent.toString());
 +          logMessage.append(" closing " + closing);
 +          logMessage.append(" closed " + closed);
 +          logMessage.append(" majorCompactionWaitingToStart " + majorCompactionWaitingToStart);
 +          if (tabletMemory != null)
 +            logMessage.append(" tabletMemory.memoryReservedForMinC() " + tabletMemory.memoryReservedForMinC());
 +          if (tabletMemory != null && tabletMemory.getMemTable() != null)
 +            logMessage.append(" tabletMemory.getMemTable().getNumEntries() " + tabletMemory.getMemTable().getNumEntries());
 +          logMessage.append(" updatingFlushID " + updatingFlushID);
 +
 +          return null;
 +        }
 +
 +        mct = prepareForMinC(flushId, mincReason);
 +        t2 = System.currentTimeMillis();
 +      }
 +    } finally {
 +      // log outside of sync block
 +      if (logMessage != null && log.isDebugEnabled())
 +        log.debug(logMessage);
 +    }
 +
 +    log.debug(String.format("MinC initiate lock %.2f secs", (t2 - t1) / 1000.0));
 +    return mct;
 +  }
 +
 +  long getFlushID() throws NoNodeException {
 +    try {
 +      String zTablePath = Constants.ZROOT + "/" + HdfsZooInstance.getInstance().getInstanceID() + Constants.ZTABLES + "/" + extent.getTableId()
 +          + Constants.ZTABLE_FLUSH_ID;
 +      return Long.parseLong(new String(ZooReaderWriter.getRetryingInstance().getData(zTablePath, null), Constants.UTF8));
 +    } catch (InterruptedException e) {
 +      throw new RuntimeException(e);
 +    } catch (NumberFormatException nfe) {
 +      throw new RuntimeException(nfe);
 +    } catch (KeeperException ke) {
 +      if (ke instanceof NoNodeException) {
 +        throw (NoNodeException) ke;
 +      } else {
 +        throw new RuntimeException(ke);
 +      }
 +    }
 +  }
 +
 +  long getCompactionCancelID() {
 +    String zTablePath = Constants.ZROOT + "/" + HdfsZooInstance.getInstance().getInstanceID() + Constants.ZTABLES + "/" + extent.getTableId()
 +        + Constants.ZTABLE_COMPACT_CANCEL_ID;
 +
 +    try {
 +      return Long.parseLong(new String(ZooReaderWriter.getRetryingInstance().getData(zTablePath, null), Constants.UTF8));
 +    } catch (KeeperException e) {
 +      throw new RuntimeException(e);
 +    } catch (InterruptedException e) {
 +      throw new RuntimeException(e);
 +    }
 +  }
 +
 +  Pair<Long,List<IteratorSetting>> getCompactionID() throws NoNodeException {
 +    try {
 +      String zTablePath = Constants.ZROOT + "/" + HdfsZooInstance.getInstance().getInstanceID() + Constants.ZTABLES + "/" + extent.getTableId()
 +          + Constants.ZTABLE_COMP

<TRUNCATED>

[06/10] git commit: Merge branch '1.5.2-SNAPSHOT' into 1.6.1-SNAPSHOT

Posted by el...@apache.org.
Merge branch '1.5.2-SNAPSHOT' into 1.6.1-SNAPSHOT

Conflicts:
	server/tserver/src/main/java/org/apache/accumulo/tserver/Tablet.java
	server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/d54e0fd8
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/d54e0fd8
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/d54e0fd8

Branch: refs/heads/master
Commit: d54e0fd8636405b39a982a6fad5a3fca1593d6cf
Parents: 0d76cd5 7699e1f
Author: Josh Elser <el...@apache.org>
Authored: Thu Sep 11 17:42:01 2014 -0700
Committer: Josh Elser <el...@apache.org>
Committed: Thu Sep 11 17:42:01 2014 -0700

----------------------------------------------------------------------
 .../system/SourceSwitchingIterator.java         | 20 ++++------
 .../system/SourceSwitchingIteratorTest.java     | 38 +++++++++++++++++-
 .../apache/accumulo/tserver/FileManager.java    | 13 +++++++
 .../apache/accumulo/tserver/InMemoryMap.java    | 21 +++++++---
 .../org/apache/accumulo/tserver/Tablet.java     |  5 +++
 .../accumulo/tserver/InMemoryMapTest.java       | 41 ++++++++++++++++----
 6 files changed, 112 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/d54e0fd8/server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
----------------------------------------------------------------------
diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
index 8bf2517,0000000..b82b9cc
mode 100644,000000..100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
@@@ -1,562 -1,0 +1,575 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.accumulo.tserver;
 +
 +import java.io.IOException;
 +import java.util.ArrayList;
 +import java.util.Collection;
 +import java.util.Collections;
 +import java.util.HashMap;
 +import java.util.Iterator;
 +import java.util.LinkedList;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.Map.Entry;
 +import java.util.concurrent.Semaphore;
++import java.util.concurrent.atomic.AtomicBoolean;
 +
 +import org.apache.accumulo.core.conf.Property;
 +import org.apache.accumulo.core.data.Key;
 +import org.apache.accumulo.core.data.KeyExtent;
 +import org.apache.accumulo.core.data.Value;
 +import org.apache.accumulo.core.file.FileOperations;
 +import org.apache.accumulo.core.file.FileSKVIterator;
 +import org.apache.accumulo.core.file.blockfile.cache.BlockCache;
 +import org.apache.accumulo.core.iterators.IteratorEnvironment;
 +import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 +import org.apache.accumulo.core.iterators.system.InterruptibleIterator;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator.DataSource;
 +import org.apache.accumulo.core.iterators.system.TimeSettingIterator;
 +import org.apache.accumulo.core.metadata.schema.DataFileValue;
 +import org.apache.accumulo.server.conf.ServerConfiguration;
 +import org.apache.accumulo.server.fs.FileRef;
 +import org.apache.accumulo.server.fs.VolumeManager;
 +import org.apache.accumulo.server.problems.ProblemReport;
 +import org.apache.accumulo.server.problems.ProblemReportingIterator;
 +import org.apache.accumulo.server.problems.ProblemReports;
 +import org.apache.accumulo.server.problems.ProblemType;
 +import org.apache.accumulo.server.util.time.SimpleTimer;
 +import org.apache.hadoop.fs.FileSystem;
 +import org.apache.hadoop.fs.Path;
 +import org.apache.hadoop.io.Text;
 +import org.apache.log4j.Logger;
 +
 +public class FileManager {
 +  
 +  private static final Logger log = Logger.getLogger(FileManager.class);
 +  
 +  int maxOpen;
 +  
 +  private static class OpenReader implements Comparable<OpenReader> {
 +    long releaseTime;
 +    FileSKVIterator reader;
 +    String fileName;
 +    
 +    public OpenReader(String fileName, FileSKVIterator reader) {
 +      this.fileName = fileName;
 +      this.reader = reader;
 +      this.releaseTime = System.currentTimeMillis();
 +    }
 +    
 +    @Override
 +    public int compareTo(OpenReader o) {
 +      if (releaseTime < o.releaseTime) {
 +        return -1;
 +      } else if (releaseTime > o.releaseTime) {
 +        return 1;
 +      } else {
 +        return 0;
 +      }
 +    }
 +    
 +    @Override
 +    public boolean equals(Object obj) {
 +      if (obj instanceof OpenReader) {
 +        return compareTo((OpenReader) obj) == 0;
 +      }
 +      return false;
 +    }
 +    
 +    @Override
 +    public int hashCode() {
 +      return fileName.hashCode();
 +    }
 +  }
 +  
 +  private Map<String,List<OpenReader>> openFiles;
 +  private HashMap<FileSKVIterator,String> reservedReaders;
 +  
 +  private Semaphore filePermits;
 +  
 +  private VolumeManager fs;
 +  
 +  // the data cache and index cache are allocated in
 +  // TabletResourceManager and passed through the file opener to
 +  // CachableBlockFile which can handle the caches being
 +  // null if unallocated
 +  private BlockCache dataCache = null;
 +  private BlockCache indexCache = null;
 +  
 +  private long maxIdleTime;
 +  
 +  private final ServerConfiguration conf;
 +  
 +  private class IdleFileCloser implements Runnable {
 +    
 +    @Override
 +    public void run() {
 +      
 +      long curTime = System.currentTimeMillis();
 +      
 +      ArrayList<FileSKVIterator> filesToClose = new ArrayList<FileSKVIterator>();
 +      
 +      // determine which files to close in a sync block, and then close the
 +      // files outside of the sync block
 +      synchronized (FileManager.this) {
 +        Iterator<Entry<String,List<OpenReader>>> iter = openFiles.entrySet().iterator();
 +        while (iter.hasNext()) {
 +          Entry<String,List<OpenReader>> entry = iter.next();
 +          List<OpenReader> ofl = entry.getValue();
 +          
 +          for (Iterator<OpenReader> oflIter = ofl.iterator(); oflIter.hasNext();) {
 +            OpenReader openReader = oflIter.next();
 +            
 +            if (curTime - openReader.releaseTime > maxIdleTime) {
 +              
 +              filesToClose.add(openReader.reader);
 +              oflIter.remove();
 +            }
 +          }
 +          
 +          if (ofl.size() == 0) {
 +            iter.remove();
 +          }
 +        }
 +      }
 +      
 +      closeReaders(filesToClose);
 +      
 +    }
 +    
 +  }
 +  
 +  /**
 +   * 
 +   * @param dataCache
 +   *          : underlying file can and should be able to handle a null cache
 +   * @param indexCache
 +   *          : underlying file can and should be able to handle a null cache
 +   */
 +  FileManager(ServerConfiguration conf, VolumeManager fs, int maxOpen, BlockCache dataCache, BlockCache indexCache) {
 +    
 +    if (maxOpen <= 0)
 +      throw new IllegalArgumentException("maxOpen <= 0");
 +    this.conf = conf;
 +    this.dataCache = dataCache;
 +    this.indexCache = indexCache;
 +    
 +    this.filePermits = new Semaphore(maxOpen, true);
 +    this.maxOpen = maxOpen;
 +    this.fs = fs;
 +    
 +    this.openFiles = new HashMap<String,List<OpenReader>>();
 +    this.reservedReaders = new HashMap<FileSKVIterator,String>();
 +    
 +    this.maxIdleTime = conf.getConfiguration().getTimeInMillis(Property.TSERV_MAX_IDLE);
 +    SimpleTimer.getInstance().schedule(new IdleFileCloser(), maxIdleTime, maxIdleTime / 2);
 +    
 +  }
 +  
 +  private static int countReaders(Map<String,List<OpenReader>> files) {
 +    int count = 0;
 +    
 +    for (List<OpenReader> list : files.values()) {
 +      count += list.size();
 +    }
 +    
 +    return count;
 +  }
 +  
 +  private List<FileSKVIterator> takeLRUOpenFiles(int numToTake) {
 +    
 +    ArrayList<OpenReader> openReaders = new ArrayList<OpenReader>();
 +    
 +    for (Entry<String,List<OpenReader>> entry : openFiles.entrySet()) {
 +      openReaders.addAll(entry.getValue());
 +    }
 +    
 +    Collections.sort(openReaders);
 +    
 +    ArrayList<FileSKVIterator> ret = new ArrayList<FileSKVIterator>();
 +    
 +    for (int i = 0; i < numToTake; i++) {
 +      OpenReader or = openReaders.get(i);
 +      
 +      List<OpenReader> ofl = openFiles.get(or.fileName);
 +      if (!ofl.remove(or)) {
 +        throw new RuntimeException("Failed to remove open reader that should have been there");
 +      }
 +      
 +      if (ofl.size() == 0) {
 +        openFiles.remove(or.fileName);
 +      }
 +      
 +      ret.add(or.reader);
 +    }
 +    
 +    return ret;
 +  }
 +  
 +  private static <T> List<T> getFileList(String file, Map<String,List<T>> files) {
 +    List<T> ofl = files.get(file);
 +    if (ofl == null) {
 +      ofl = new ArrayList<T>();
 +      files.put(file, ofl);
 +    }
 +    
 +    return ofl;
 +  }
 +  
 +  private void closeReaders(List<FileSKVIterator> filesToClose) {
 +    for (FileSKVIterator reader : filesToClose) {
 +      try {
 +        reader.close();
 +      } catch (Exception e) {
 +        log.error("Failed to close file " + e.getMessage(), e);
 +      }
 +    }
 +  }
 +  
 +  private List<String> takeOpenFiles(Collection<String> files, List<FileSKVIterator> reservedFiles, Map<FileSKVIterator,String> readersReserved) {
 +    List<String> filesToOpen = new LinkedList<String>(files);
 +    for (Iterator<String> iterator = filesToOpen.iterator(); iterator.hasNext();) {
 +      String file = iterator.next();
 +      
 +      List<OpenReader> ofl = openFiles.get(file);
 +      if (ofl != null && ofl.size() > 0) {
 +        OpenReader openReader = ofl.remove(ofl.size() - 1);
 +        reservedFiles.add(openReader.reader);
 +        readersReserved.put(openReader.reader, file);
 +        if (ofl.size() == 0) {
 +          openFiles.remove(file);
 +        }
 +        iterator.remove();
 +      }
 +      
 +    }
 +    return filesToOpen;
 +  }
 +  
 +  private synchronized String getReservedReadeFilename(FileSKVIterator reader) {
 +    return reservedReaders.get(reader);
 +  }
 +  
 +  private List<FileSKVIterator> reserveReaders(Text table, Collection<String> files, boolean continueOnFailure) throws IOException {
 +    
 +    if (files.size() >= maxOpen) {
 +      throw new IllegalArgumentException("requested files exceeds max open");
 +    }
 +    
 +    if (files.size() == 0) {
 +      return Collections.emptyList();
 +    }
 +    
 +    List<String> filesToOpen = null;
 +    List<FileSKVIterator> filesToClose = Collections.emptyList();
 +    List<FileSKVIterator> reservedFiles = new ArrayList<FileSKVIterator>();
 +    Map<FileSKVIterator,String> readersReserved = new HashMap<FileSKVIterator,String>();
 +    
 +    filePermits.acquireUninterruptibly(files.size());
 +    
 +    // now that the we are past the semaphore, we have the authority
 +    // to open files.size() files
 +    
 +    // determine what work needs to be done in sync block
 +    // but do the work of opening and closing files outside
 +    // a synch block
 +    synchronized (this) {
 +      
 +      filesToOpen = takeOpenFiles(files, reservedFiles, readersReserved);
 +      
 +      int numOpen = countReaders(openFiles);
 +      
 +      if (filesToOpen.size() + numOpen + reservedReaders.size() > maxOpen) {
 +        filesToClose = takeLRUOpenFiles((filesToOpen.size() + numOpen + reservedReaders.size()) - maxOpen);
 +      }
 +    }
 +    
 +    // close files before opening files to ensure we stay under resource
 +    // limitations
 +    closeReaders(filesToClose);
 +    
 +    // open any files that need to be opened
 +    for (String file : filesToOpen) {
 +      try {
 +        if (!file.contains(":"))
 +          throw new IllegalArgumentException("Expected uri, got : " + file);
 +        Path path = new Path(file);
 +        FileSystem ns = fs.getVolumeByPath(path).getFileSystem();
 +        //log.debug("Opening "+file + " path " + path);
 +        FileSKVIterator reader = FileOperations.getInstance().openReader(path.toString(), false, ns, ns.getConf(), conf.getTableConfiguration(table.toString()),
 +            dataCache, indexCache);
 +        reservedFiles.add(reader);
 +        readersReserved.put(reader, file);
 +      } catch (Exception e) {
 +        
 +        ProblemReports.getInstance().report(new ProblemReport(table.toString(), ProblemType.FILE_READ, file, e));
 +        
 +        if (continueOnFailure) {
 +          // release the permit for the file that failed to open
 +          filePermits.release(1);
 +          log.warn("Failed to open file " + file + " " + e.getMessage() + " continuing...");
 +        } else {
 +          // close whatever files were opened
 +          closeReaders(reservedFiles);
 +          
 +          filePermits.release(files.size());
 +          
 +          log.error("Failed to open file " + file + " " + e.getMessage());
 +          throw new IOException("Failed to open " + file, e);
 +        }
 +      }
 +    }
 +    
 +    synchronized (this) {
 +      // update set of reserved readers
 +      reservedReaders.putAll(readersReserved);
 +    }
 +    
 +    return reservedFiles;
 +  }
 +  
 +  private void releaseReaders(List<FileSKVIterator> readers, boolean sawIOException) {
 +    // put files in openFiles
 +    
 +    synchronized (this) {
 +      
 +      // check that readers were actually reserved ... want to make sure a thread does
 +      // not try to release readers they never reserved
 +      if (!reservedReaders.keySet().containsAll(readers)) {
 +        throw new IllegalArgumentException("Asked to release readers that were never reserved ");
 +      }
 +      
 +      for (FileSKVIterator reader : readers) {
 +        try {
 +          reader.closeDeepCopies();
 +        } catch (IOException e) {
 +          log.warn(e, e);
 +          sawIOException = true;
 +        }
 +      }
 +      
 +      for (FileSKVIterator reader : readers) {
 +        String fileName = reservedReaders.remove(reader);
 +        if (!sawIOException)
 +          getFileList(fileName, openFiles).add(new OpenReader(fileName, reader));
 +      }
 +    }
 +    
 +    if (sawIOException)
 +      closeReaders(readers);
 +    
 +    // decrement the semaphore
 +    filePermits.release(readers.size());
 +    
 +  }
 +  
 +  static class FileDataSource implements DataSource {
 +    
 +    private SortedKeyValueIterator<Key,Value> iter;
 +    private ArrayList<FileDataSource> deepCopies;
 +    private boolean current = true;
 +    private IteratorEnvironment env;
 +    private String file;
++    private AtomicBoolean iflag;
 +    
 +    FileDataSource(String file, SortedKeyValueIterator<Key,Value> iter) {
 +      this.file = file;
 +      this.iter = iter;
 +      this.deepCopies = new ArrayList<FileManager.FileDataSource>();
 +    }
 +    
 +    public FileDataSource(IteratorEnvironment env, SortedKeyValueIterator<Key,Value> deepCopy, ArrayList<FileDataSource> deepCopies) {
 +      this.iter = deepCopy;
 +      this.env = env;
 +      this.deepCopies = deepCopies;
 +      deepCopies.add(this);
 +    }
 +    
 +    @Override
 +    public boolean isCurrent() {
 +      return current;
 +    }
 +    
 +    @Override
 +    public DataSource getNewDataSource() {
 +      current = true;
 +      return this;
 +    }
 +    
 +    @Override
 +    public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
 +      return new FileDataSource(env, iter.deepCopy(env), deepCopies);
 +    }
 +    
 +    @Override
 +    public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
++      if (iflag != null)
++        ((InterruptibleIterator) this.iter).setInterruptFlag(iflag);
 +      return iter;
 +    }
 +    
 +    void unsetIterator() {
 +      current = false;
 +      iter = null;
 +      for (FileDataSource fds : deepCopies) {
 +        fds.current = false;
 +        fds.iter = null;
 +      }
 +    }
 +    
 +    void setIterator(SortedKeyValueIterator<Key,Value> iter) {
 +      current = false;
 +      this.iter = iter;
++
++      if (iflag != null)
++        ((InterruptibleIterator) this.iter).setInterruptFlag(iflag);
++
 +      for (FileDataSource fds : deepCopies) {
 +        fds.current = false;
 +        fds.iter = iter.deepCopy(fds.env);
 +      }
 +    }
++
++    @Override
++    public void setInterruptFlag(AtomicBoolean flag) {
++      this.iflag = flag;
++    }
 +    
 +  }
 +  
 +  public class ScanFileManager {
 +    
 +    private ArrayList<FileDataSource> dataSources;
 +    private ArrayList<FileSKVIterator> tabletReservedReaders;
 +    private KeyExtent tablet;
 +    private boolean continueOnFailure;
 +    
 +    ScanFileManager(KeyExtent tablet) {
 +      tabletReservedReaders = new ArrayList<FileSKVIterator>();
 +      dataSources = new ArrayList<FileDataSource>();
 +      this.tablet = tablet;
 +      
 +      continueOnFailure = conf.getTableConfiguration(tablet).getBoolean(Property.TABLE_FAILURES_IGNORE);
 +      
 +      if (tablet.isMeta()) {
 +        continueOnFailure = false;
 +      }
 +    }
 +    
 +    private List<FileSKVIterator> openFileRefs(Collection<FileRef> files) throws TooManyFilesException, IOException {
 +      List<String> strings = new ArrayList<String>(files.size());
 +      for (FileRef ref : files)
 +        strings.add(ref.path().toString());
 +      return openFiles(strings);
 +    }
 +    
 +    private List<FileSKVIterator> openFiles(Collection<String> files) throws TooManyFilesException, IOException {
 +      // one tablet can not open more than maxOpen files, otherwise it could get stuck
 +      // forever waiting on itself to release files
 +      
 +      if (tabletReservedReaders.size() + files.size() >= maxOpen) {
 +        throw new TooManyFilesException("Request to open files would exceed max open files reservedReaders.size()=" + tabletReservedReaders.size()
 +            + " files.size()=" + files.size() + " maxOpen=" + maxOpen + " tablet = " + tablet);
 +      }
 +      
 +      List<FileSKVIterator> newlyReservedReaders = reserveReaders(tablet.getTableId(), files, continueOnFailure);
 +      
 +      tabletReservedReaders.addAll(newlyReservedReaders);
 +      return newlyReservedReaders;
 +    }
 +    
 +    synchronized List<InterruptibleIterator> openFiles(Map<FileRef,DataFileValue> files, boolean detachable) throws IOException {
 +      
 +      List<FileSKVIterator> newlyReservedReaders = openFileRefs(files.keySet());
 +      
 +      ArrayList<InterruptibleIterator> iters = new ArrayList<InterruptibleIterator>();
 +      
 +      for (FileSKVIterator reader : newlyReservedReaders) {
 +        String filename = getReservedReadeFilename(reader);
 +        InterruptibleIterator iter;
 +        if (detachable) {
 +          FileDataSource fds = new FileDataSource(filename, reader);
 +          dataSources.add(fds);
 +          SourceSwitchingIterator ssi = new SourceSwitchingIterator(fds);
 +          iter = new ProblemReportingIterator(tablet.getTableId().toString(), filename, continueOnFailure, ssi);
 +        } else {
 +          iter = new ProblemReportingIterator(tablet.getTableId().toString(), filename, continueOnFailure, reader);
 +        }
 +        DataFileValue value = files.get(new FileRef(filename));
 +        if (value.isTimeSet()) {
 +          iter = new TimeSettingIterator(iter, value.getTime());
 +        }
 +        
 +        iters.add(iter);
 +      }
 +      
 +      return iters;
 +    }
 +    
 +    synchronized void detach() {
 +      
 +      releaseReaders(tabletReservedReaders, false);
 +      tabletReservedReaders.clear();
 +      
 +      for (FileDataSource fds : dataSources)
 +        fds.unsetIterator();
 +    }
 +    
 +    synchronized void reattach() throws IOException {
 +      if (tabletReservedReaders.size() != 0)
 +        throw new IllegalStateException();
 +      
 +      Collection<String> files = new ArrayList<String>();
 +      for (FileDataSource fds : dataSources)
 +        files.add(fds.file);
 +      
 +      List<FileSKVIterator> newlyReservedReaders = openFiles(files);
 +      Map<String,List<FileSKVIterator>> map = new HashMap<String,List<FileSKVIterator>>();
 +      for (FileSKVIterator reader : newlyReservedReaders) {
 +        String fileName = getReservedReadeFilename(reader);
 +        List<FileSKVIterator> list = map.get(fileName);
 +        if (list == null) {
 +          list = new LinkedList<FileSKVIterator>();
 +          map.put(fileName, list);
 +        }
 +        
 +        list.add(reader);
 +      }
 +      
 +      for (FileDataSource fds : dataSources) {
 +        FileSKVIterator reader = map.get(fds.file).remove(0);
 +        fds.setIterator(reader);
 +      }
 +    }
 +    
 +    synchronized void releaseOpenFiles(boolean sawIOException) {
 +      releaseReaders(tabletReservedReaders, sawIOException);
 +      tabletReservedReaders.clear();
 +      dataSources.clear();
 +    }
 +    
 +    synchronized int getNumOpenFiles() {
 +      return tabletReservedReaders.size();
 +    }
 +  }
 +  
 +  public ScanFileManager newScanFileManager(KeyExtent tablet) {
 +    return new ScanFileManager(tablet);
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/d54e0fd8/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
----------------------------------------------------------------------
diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
index 5f6d9ce,0000000..2e15767
mode 100644,000000..100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
@@@ -1,772 -1,0 +1,783 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.accumulo.tserver;
 +
 +import java.io.IOException;
 +import java.io.Serializable;
 +import java.util.ArrayList;
 +import java.util.Collection;
 +import java.util.Collections;
 +import java.util.Comparator;
 +import java.util.HashMap;
 +import java.util.HashSet;
 +import java.util.Iterator;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.Map.Entry;
 +import java.util.Set;
 +import java.util.SortedMap;
 +import java.util.UUID;
 +import java.util.concurrent.ConcurrentSkipListMap;
 +import java.util.concurrent.atomic.AtomicBoolean;
 +import java.util.concurrent.atomic.AtomicInteger;
 +import java.util.concurrent.atomic.AtomicLong;
 +
 +import org.apache.accumulo.core.conf.AccumuloConfiguration;
 +import org.apache.accumulo.core.conf.Property;
 +import org.apache.accumulo.core.data.ByteSequence;
 +import org.apache.accumulo.core.data.ColumnUpdate;
 +import org.apache.accumulo.core.data.Key;
 +import org.apache.accumulo.core.data.Mutation;
 +import org.apache.accumulo.core.data.Range;
 +import org.apache.accumulo.core.data.Value;
 +import org.apache.accumulo.core.file.FileSKVIterator;
 +import org.apache.accumulo.core.file.FileSKVWriter;
 +import org.apache.accumulo.core.file.rfile.RFile;
 +import org.apache.accumulo.core.file.rfile.RFileOperations;
 +import org.apache.accumulo.core.iterators.IteratorEnvironment;
 +import org.apache.accumulo.core.iterators.SkippingIterator;
 +import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 +import org.apache.accumulo.core.iterators.SortedMapIterator;
 +import org.apache.accumulo.core.iterators.WrappingIterator;
 +import org.apache.accumulo.core.iterators.system.InterruptibleIterator;
 +import org.apache.accumulo.core.iterators.system.LocalityGroupIterator;
 +import org.apache.accumulo.core.iterators.system.LocalityGroupIterator.LocalityGroup;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator.DataSource;
 +import org.apache.accumulo.core.util.CachedConfiguration;
 +import org.apache.accumulo.core.util.LocalityGroupUtil;
 +import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError;
 +import org.apache.accumulo.core.util.LocalityGroupUtil.Partitioner;
 +import org.apache.accumulo.core.util.UtilWaitThread;
 +import org.apache.accumulo.server.conf.ServerConfiguration;
 +import org.apache.accumulo.server.trace.TraceFileSystem;
 +import org.apache.commons.lang.mutable.MutableLong;
 +import org.apache.hadoop.conf.Configuration;
 +import org.apache.hadoop.fs.FileSystem;
 +import org.apache.hadoop.fs.Path;
 +import org.apache.log4j.Logger;
 +
 +class MemKeyComparator implements Comparator<Key>, Serializable {
 +  
 +  private static final long serialVersionUID = 1L;
 +
 +  @Override
 +  public int compare(Key k1, Key k2) {
 +    int cmp = k1.compareTo(k2);
 +    
 +    if (cmp == 0) {
 +      if (k1 instanceof MemKey)
 +        if (k2 instanceof MemKey)
 +          cmp = ((MemKey) k2).kvCount - ((MemKey) k1).kvCount;
 +        else
 +          cmp = 1;
 +      else if (k2 instanceof MemKey)
 +        cmp = -1;
 +    }
 +    
 +    return cmp;
 +  }
 +}
 +
 +class PartialMutationSkippingIterator extends SkippingIterator implements InterruptibleIterator {
 +  
 +  int kvCount;
 +  
 +  public PartialMutationSkippingIterator(SortedKeyValueIterator<Key,Value> source, int maxKVCount) {
 +    setSource(source);
 +    this.kvCount = maxKVCount;
 +  }
 +  
 +  @Override
 +  protected void consume() throws IOException {
 +    while (getSource().hasTop() && ((MemKey) getSource().getTopKey()).kvCount > kvCount)
 +      getSource().next();
 +  }
 +  
 +  @Override
 +  public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
 +    return new PartialMutationSkippingIterator(getSource().deepCopy(env), kvCount);
 +  }
 +  
 +  @Override
 +  public void setInterruptFlag(AtomicBoolean flag) {
 +    ((InterruptibleIterator) getSource()).setInterruptFlag(flag);
 +  }
 +  
 +}
 +
 +class MemKeyConversionIterator extends WrappingIterator implements InterruptibleIterator {
 +  MemKey currKey = null;
 +  Value currVal = null;
 +
 +  public MemKeyConversionIterator(SortedKeyValueIterator<Key,Value> source) {
 +    super();
 +    setSource(source);
 +  }
 +
 +  @Override
 +  public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
 +    return new MemKeyConversionIterator(getSource().deepCopy(env));
 +  }
 +  
 +  @Override
 +  public Key getTopKey() {
 +    return currKey;
 +  }
 +  
 +  @Override
 +  public Value getTopValue() {
 +    return currVal;
 +  }
 +  
 +  private void getTopKeyVal() {
 +    Key k = super.getTopKey();
 +    Value v = super.getTopValue();
 +    if (k instanceof MemKey || k == null) {
 +      currKey = (MemKey) k;
 +      currVal = v;
 +      return;
 +    }
 +    currVal = new Value(v);
 +    int mc = MemValue.splitKVCount(currVal);
 +    currKey = new MemKey(k, mc);
 +
 +  }
 +  
 +  public void next() throws IOException {
 +    super.next();
 +    if (hasTop())
 +      getTopKeyVal();
 +  }
 +
 +  public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
 +    super.seek(range, columnFamilies, inclusive);
 +    
 +    if (hasTop())
 +      getTopKeyVal();
 +
 +    Key k = range.getStartKey();
 +    if (k instanceof MemKey && hasTop()) {
 +      while (hasTop() && currKey.compareTo(k) < 0)
 +        next();
 +    }
 +  }
 +
 +  @Override
 +  public void setInterruptFlag(AtomicBoolean flag) {
 +    ((InterruptibleIterator) getSource()).setInterruptFlag(flag);
 +  }
 +
 +}
 +
 +public class InMemoryMap {
 +  private SimpleMap map = null;
 +  
 +  private static final Logger log = Logger.getLogger(InMemoryMap.class);
 +  
 +  private volatile String memDumpFile = null;
 +  private final String memDumpDir;
 +
 +  private Map<String,Set<ByteSequence>> lggroups;
 +  
 +  public InMemoryMap(boolean useNativeMap, String memDumpDir) {
 +    this(new HashMap<String,Set<ByteSequence>>(), useNativeMap, memDumpDir);
 +  }
 +
 +  public InMemoryMap(Map<String,Set<ByteSequence>> lggroups, boolean useNativeMap, String memDumpDir) {
 +    this.memDumpDir = memDumpDir;
 +    this.lggroups = lggroups;
 +    
 +    if (lggroups.size() == 0)
 +      map = newMap(useNativeMap);
 +    else
 +      map = new LocalityGroupMap(lggroups, useNativeMap);
 +  }
 +  
 +  public InMemoryMap(AccumuloConfiguration config) throws LocalityGroupConfigurationError {
 +    this(LocalityGroupUtil.getLocalityGroups(config), config.getBoolean(Property.TSERV_NATIVEMAP_ENABLED), config.get(Property.TSERV_MEMDUMP_DIR));
 +  }
 +  
 +  private static SimpleMap newMap(boolean useNativeMap) {
 +    if (useNativeMap && NativeMap.isLoaded()) {
 +      try {
 +        return new NativeMapWrapper();
 +      } catch (Throwable t) {
 +        log.error("Failed to create native map", t);
 +      }
 +    }
 +    
 +    return new DefaultMap();
 +  }
 +  
 +  private interface SimpleMap {
 +    Value get(Key key);
 +    
 +    Iterator<Entry<Key,Value>> iterator(Key startKey);
 +    
 +    int size();
 +    
 +    InterruptibleIterator skvIterator();
 +    
 +    void delete();
 +    
 +    long getMemoryUsed();
 +    
 +    void mutate(List<Mutation> mutations, int kvCount);
 +  }
 +  
 +  private static class LocalityGroupMap implements SimpleMap {
 +    
 +    private Map<ByteSequence,MutableLong> groupFams[];
 +    
 +    // the last map in the array is the default locality group
 +    private SimpleMap maps[];
 +    private Partitioner partitioner;
 +    private List<Mutation>[] partitioned;
 +    private Set<ByteSequence> nonDefaultColumnFamilies;
 +    
 +    @SuppressWarnings("unchecked")
 +    LocalityGroupMap(Map<String,Set<ByteSequence>> groups, boolean useNativeMap) {
 +      this.groupFams = new Map[groups.size()];
 +      this.maps = new SimpleMap[groups.size() + 1];
 +      this.partitioned = new List[groups.size() + 1];
 +      this.nonDefaultColumnFamilies = new HashSet<ByteSequence>();
 +      
 +      for (int i = 0; i < maps.length; i++) {
 +        maps[i] = newMap(useNativeMap);
 +      }
 +
 +      int count = 0;
 +      for (Set<ByteSequence> cfset : groups.values()) {
 +        HashMap<ByteSequence,MutableLong> map = new HashMap<ByteSequence,MutableLong>();
 +        for (ByteSequence bs : cfset)
 +          map.put(bs, new MutableLong(1));
 +        this.groupFams[count++] = map;
 +        nonDefaultColumnFamilies.addAll(cfset);
 +      }
 +      
 +      partitioner = new LocalityGroupUtil.Partitioner(this.groupFams);
 +      
 +      for (int i = 0; i < partitioned.length; i++) {
 +        partitioned[i] = new ArrayList<Mutation>();
 +      }
 +    }
 +
 +    @Override
 +    public Value get(Key key) {
 +      throw new UnsupportedOperationException();
 +    }
 +    
 +    @Override
 +    public Iterator<Entry<Key,Value>> iterator(Key startKey) {
 +      throw new UnsupportedOperationException();
 +    }
 +    
 +    @Override
 +    public int size() {
 +      int sum = 0;
 +      for (SimpleMap map : maps)
 +        sum += map.size();
 +      return sum;
 +    }
 +    
 +    @Override
 +    public InterruptibleIterator skvIterator() {
 +      LocalityGroup groups[] = new LocalityGroup[maps.length];
 +      for (int i = 0; i < groups.length; i++) {
 +        if (i < groupFams.length)
 +          groups[i] = new LocalityGroup(maps[i].skvIterator(), groupFams[i], false);
 +        else
 +          groups[i] = new LocalityGroup(maps[i].skvIterator(), null, true);
 +      }
 +
 +
 +      return new LocalityGroupIterator(groups, nonDefaultColumnFamilies);
 +    }
 +    
 +    @Override
 +    public void delete() {
 +      for (SimpleMap map : maps)
 +        map.delete();
 +    }
 +    
 +    @Override
 +    public long getMemoryUsed() {
 +      long sum = 0;
 +      for (SimpleMap map : maps)
 +        sum += map.getMemoryUsed();
 +      return sum;
 +    }
 +    
 +    @Override
 +    public synchronized void mutate(List<Mutation> mutations, int kvCount) {
 +      // this method is synchronized because it reuses objects to avoid allocation,
 +      // currently, the method that calls this is synchronized so there is no
 +      // loss in parallelism.... synchronization was added here for future proofing
 +      
 +      try{
 +        partitioner.partition(mutations, partitioned);
 +        
 +        for (int i = 0; i < partitioned.length; i++) {
 +          if (partitioned[i].size() > 0) {
 +            maps[i].mutate(partitioned[i], kvCount);
 +            for (Mutation m : partitioned[i])
 +              kvCount += m.getUpdates().size();
 +          }
 +        }
 +      } finally {
 +        // clear immediately so mutations can be garbage collected
 +        for (List<Mutation> list : partitioned) {
 +          list.clear();
 +        }
 +      }
 +    }
 +    
 +  }
 +
 +  private static class DefaultMap implements SimpleMap {
 +    private ConcurrentSkipListMap<Key,Value> map = new ConcurrentSkipListMap<Key,Value>(new MemKeyComparator());
 +    private AtomicLong bytesInMemory = new AtomicLong();
 +    private AtomicInteger size = new AtomicInteger();
 +    
 +    public void put(Key key, Value value) {
 +      // Always a MemKey, so account for the kvCount int
 +      bytesInMemory.addAndGet(key.getLength() + 4);
 +      bytesInMemory.addAndGet(value.getSize());
 +      if (map.put(key, value) == null)
 +        size.incrementAndGet();
 +    }
 +    
 +    public Value get(Key key) {
 +      return map.get(key);
 +    }
 +    
 +    public Iterator<Entry<Key,Value>> iterator(Key startKey) {
 +      Key lk = new Key(startKey);
 +      SortedMap<Key,Value> tm = map.tailMap(lk);
 +      return tm.entrySet().iterator();
 +    }
 +    
 +    public int size() {
 +      return size.get();
 +    }
 +    
 +    public synchronized InterruptibleIterator skvIterator() {
 +      if (map == null)
 +        throw new IllegalStateException();
 +      
 +      return new SortedMapIterator(map);
 +    }
 +    
 +    public synchronized void delete() {
 +      map = null;
 +    }
 +    
 +    public long getOverheadPerEntry() {
 +      // all of the java objects that are used to hold the
 +      // data and make it searchable have overhead... this
 +      // overhead is estimated using test.EstimateInMemMapOverhead
 +      // and is in bytes.. the estimates were obtained by running
 +      // java 6_16 in 64 bit server mode
 +      
 +      return 200;
 +    }
 +    
 +    @Override
 +    public void mutate(List<Mutation> mutations, int kvCount) {
 +      for (Mutation m : mutations) {
 +        for (ColumnUpdate cvp : m.getUpdates()) {
 +          Key newKey = new MemKey(m.getRow(), cvp.getColumnFamily(), cvp.getColumnQualifier(), cvp.getColumnVisibility(), cvp.getTimestamp(), cvp.isDeleted(),
 +              false, kvCount++);
 +          Value value = new Value(cvp.getValue());
 +          put(newKey, value);
 +        }
 +      }
 +    }
 +    
 +    @Override
 +    public long getMemoryUsed() {
 +      return bytesInMemory.get() + (size() * getOverheadPerEntry());
 +    }
 +  }
 +  
 +  private static class NativeMapWrapper implements SimpleMap {
 +    private NativeMap nativeMap;
 +    
 +    NativeMapWrapper() {
 +      nativeMap = new NativeMap();
 +    }
 +    
 +    public Value get(Key key) {
 +      return nativeMap.get(key);
 +    }
 +    
 +    public Iterator<Entry<Key,Value>> iterator(Key startKey) {
 +      return nativeMap.iterator(startKey);
 +    }
 +    
 +    public int size() {
 +      return nativeMap.size();
 +    }
 +    
 +    public InterruptibleIterator skvIterator() {
 +      return (InterruptibleIterator) nativeMap.skvIterator();
 +    }
 +    
 +    public void delete() {
 +      nativeMap.delete();
 +    }
 +    
 +    public long getMemoryUsed() {
 +      return nativeMap.getMemoryUsed();
 +    }
 +    
 +    @Override
 +    public void mutate(List<Mutation> mutations, int kvCount) {
 +      nativeMap.mutate(mutations, kvCount);
 +    }
 +  }
 +  
 +  private AtomicInteger nextKVCount = new AtomicInteger(1);
 +  private AtomicInteger kvCount = new AtomicInteger(0);
 +
 +  private Object writeSerializer = new Object();
 +  
 +  /**
 +   * Applies changes to a row in the InMemoryMap
 +   * 
 +   */
 +  public void mutate(List<Mutation> mutations) {
 +    int numKVs = 0;
 +    for (int i = 0; i < mutations.size(); i++)
 +      numKVs += mutations.get(i).size();
 +    
 +    // Can not update mutationCount while writes that started before
 +    // are in progress, this would cause partial mutations to be seen.
 +    // Also, can not continue until mutation count is updated, because
 +    // a read may not see a successful write. Therefore writes must
 +    // wait for writes that started before to finish.
 +    //
 +    // using separate lock from this map, to allow read/write in parallel
 +    synchronized (writeSerializer ) {
 +      int kv = nextKVCount.getAndAdd(numKVs);
 +      try {
 +        map.mutate(mutations, kv);
 +      } finally {
 +        kvCount.set(kv + numKVs - 1);
 +      }
 +    }
 +  }
 +  
 +  /**
 +   * Returns a long representing the size of the InMemoryMap
 +   * 
 +   * @return bytesInMemory
 +   */
 +  public synchronized long estimatedSizeInBytes() {
 +    if (map == null)
 +      return 0;
 +    
 +    return map.getMemoryUsed();
 +  }
 +  
 +  Iterator<Map.Entry<Key,Value>> iterator(Key startKey) {
 +    return map.iterator(startKey);
 +  }
 +  
 +  public long getNumEntries() {
 +    return map.size();
 +  }
 +  
 +  private final Set<MemoryIterator> activeIters = Collections.synchronizedSet(new HashSet<MemoryIterator>());
 +  
 +  class MemoryDataSource implements DataSource {
 +    
 +    boolean switched = false;
 +    private InterruptibleIterator iter;
 +    private FileSKVIterator reader;
 +    private MemoryDataSource parent;
 +    private IteratorEnvironment env;
++    private AtomicBoolean iflag;
 +    
 +    MemoryDataSource() {
-       this(null, false, null);
++      this(null, false, null, null);
 +    }
 +    
-     public MemoryDataSource(MemoryDataSource parent, boolean switched, IteratorEnvironment env) {
++    public MemoryDataSource(MemoryDataSource parent, boolean switched, IteratorEnvironment env, AtomicBoolean iflag) {
 +      this.parent = parent;
 +      this.switched = switched;
 +      this.env = env;
++      this.iflag = iflag;
 +    }
 +    
 +    @Override
 +    public boolean isCurrent() {
 +      if (switched)
 +        return true;
 +      else
 +        return memDumpFile == null;
 +    }
 +    
 +    @Override
 +    public DataSource getNewDataSource() {
 +      if (switched)
 +        throw new IllegalStateException();
 +      
 +      if (!isCurrent()) {
 +        switched = true;
 +        iter = null;
 +        try {
 +          // ensure files are referenced even if iterator was never seeked before
 +          iterator();
 +        } catch (IOException e) {
 +          throw new RuntimeException();
 +        }
 +      }
 +      
 +      return this;
 +    }
 +    
 +    private synchronized FileSKVIterator getReader() throws IOException {
 +      if (reader == null) {
 +        Configuration conf = CachedConfiguration.getInstance();
 +        FileSystem fs = TraceFileSystem.wrap(FileSystem.getLocal(conf));
 +        
 +        reader = new RFileOperations().openReader(memDumpFile, true, fs, conf, ServerConfiguration.getSiteConfiguration());
++        if (iflag != null)
++          reader.setInterruptFlag(iflag);
 +      }
 +
 +      return reader;
 +    }
 +
 +    @Override
 +    public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
 +      if (iter == null)
-         if (!switched)
++        if (!switched) {
 +          iter = map.skvIterator();
-         else {
++          if (iflag != null)
++            iter.setInterruptFlag(iflag);
++        } else {
 +          if (parent == null)
 +            iter = new MemKeyConversionIterator(getReader());
 +          else
 +            synchronized (parent) {
 +              // synchronize deep copy operation on parent, this prevents multiple threads from deep copying the rfile shared from parent its possible that the
 +              // thread deleting an InMemoryMap and scan thread could be switching different deep copies
 +              iter = new MemKeyConversionIterator(parent.getReader().deepCopy(env));
 +            }
 +        }
 +      
 +      return iter;
 +    }
 +    
 +    @Override
 +    public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
-       return new MemoryDataSource(parent == null ? this : parent, switched, env);
++      return new MemoryDataSource(parent == null ? this : parent, switched, env, iflag);
++    }
++
++    @Override
++    public void setInterruptFlag(AtomicBoolean flag) {
++      this.iflag = flag;
 +    }
 +    
 +  }
 +  
 +  class MemoryIterator extends WrappingIterator implements InterruptibleIterator {
 +    
 +    private AtomicBoolean closed;
 +    private SourceSwitchingIterator ssi;
 +    private MemoryDataSource mds;
 +    
 +    protected SortedKeyValueIterator<Key,Value> getSource() {
 +      if (closed.get())
 +        throw new IllegalStateException("Memory iterator is closed");
 +      return super.getSource();
 +    }
 +    
 +    private MemoryIterator(InterruptibleIterator source) {
 +      this(source, new AtomicBoolean(false));
 +    }
 +    
 +    private MemoryIterator(SortedKeyValueIterator<Key,Value> source, AtomicBoolean closed) {
 +      setSource(source);
 +      this.closed = closed;
 +    }
 +    
 +    public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
 +      return new MemoryIterator(getSource().deepCopy(env), closed);
 +    }
 +    
 +    public void close() {
 +      
 +      synchronized (this) {
 +        if (closed.compareAndSet(false, true)) {
 +          try {
 +            if (mds.reader != null)
 +              mds.reader.close();
 +          } catch (IOException e) {
 +            log.warn(e, e);
 +          }
 +        }
 +      }
 +      
 +      // remove outside of sync to avoid deadlock
 +      activeIters.remove(this);
 +    }
 +    
 +    private synchronized boolean switchNow() throws IOException {
 +      if (closed.get())
 +        return false;
 +      
 +      ssi.switchNow();
 +      return true;
 +    }
 +    
 +    @Override
 +    public void setInterruptFlag(AtomicBoolean flag) {
 +      ((InterruptibleIterator) getSource()).setInterruptFlag(flag);
 +    }
 +    
 +    private void setSSI(SourceSwitchingIterator ssi) {
 +      this.ssi = ssi;
 +    }
 +    
 +    public void setMDS(MemoryDataSource mds) {
 +      this.mds = mds;
 +    }
 +    
 +  }
 +  
 +  public synchronized MemoryIterator skvIterator() {
 +    if (map == null)
 +      throw new NullPointerException();
 +    
 +    if (deleted)
 +      throw new IllegalStateException("Can not obtain iterator after map deleted");
 +    
 +    int mc = kvCount.get();
 +    MemoryDataSource mds = new MemoryDataSource();
 +    SourceSwitchingIterator ssi = new SourceSwitchingIterator(new MemoryDataSource());
 +    MemoryIterator mi = new MemoryIterator(new PartialMutationSkippingIterator(ssi, mc));
 +    mi.setSSI(ssi);
 +    mi.setMDS(mds);
 +    activeIters.add(mi);
 +    return mi;
 +  }
 +  
 +  public SortedKeyValueIterator<Key,Value> compactionIterator() {
 +    
 +    if (nextKVCount.get() - 1 != kvCount.get())
 +      throw new IllegalStateException("Memory map in unexpected state : nextKVCount = " + nextKVCount.get() + " kvCount = "
 +          + kvCount.get());
 +    
 +    return map.skvIterator();
 +  }
 +  
 +  private boolean deleted = false;
 +  
 +  public void delete(long waitTime) {
 +    
 +    synchronized (this) {
 +      if (deleted)
 +        throw new IllegalStateException("Double delete");
 +      
 +      deleted = true;
 +    }
 +    
 +    long t1 = System.currentTimeMillis();
 +    
 +    while (activeIters.size() > 0 && System.currentTimeMillis() - t1 < waitTime) {
 +      UtilWaitThread.sleep(50);
 +    }
 +    
 +    if (activeIters.size() > 0) {
 +      // dump memmap exactly as is to a tmp file on disk, and switch scans to that temp file
 +      try {
 +        Configuration conf = CachedConfiguration.getInstance();
 +        FileSystem fs = TraceFileSystem.wrap(FileSystem.getLocal(conf));
 +        
 +        String tmpFile = memDumpDir + "/memDump" + UUID.randomUUID() + "." + RFile.EXTENSION;
 +        
 +        Configuration newConf = new Configuration(conf);
 +        newConf.setInt("io.seqfile.compress.blocksize", 100000);
 +        
 +        FileSKVWriter out = new RFileOperations().openWriter(tmpFile, fs, newConf, ServerConfiguration.getSiteConfiguration());
 +        
 +        InterruptibleIterator iter = map.skvIterator();
 +       
 +        HashSet<ByteSequence> allfams= new HashSet<ByteSequence>();
 +        
 +        for(Entry<String, Set<ByteSequence>> entry : lggroups.entrySet()){
 +          allfams.addAll(entry.getValue());
 +          out.startNewLocalityGroup(entry.getKey(), entry.getValue());
 +          iter.seek(new Range(), entry.getValue(), true);
 +          dumpLocalityGroup(out, iter);
 +        }
 +        
 +        out.startDefaultLocalityGroup();
 +        iter.seek(new Range(), allfams, false);
 +       
 +        dumpLocalityGroup(out, iter);
 +        
 +        out.close();
 +        
 +        log.debug("Created mem dump file " + tmpFile);
 +        
 +        memDumpFile = tmpFile;
 +        
 +        synchronized (activeIters) {
 +          for (MemoryIterator mi : activeIters) {
 +            mi.switchNow();
 +          }
 +        }
 +        
 +        // rely on unix behavior that file will be deleted when last
 +        // reader closes it
 +        fs.delete(new Path(memDumpFile), true);
 +        
 +      } catch (IOException ioe) {
 +        log.error("Failed to create mem dump file ", ioe);
 +        
 +        while (activeIters.size() > 0) {
 +          UtilWaitThread.sleep(100);
 +        }
 +      }
 +      
 +    }
 +    
 +    SimpleMap tmpMap = map;
 +    
 +    synchronized (this) {
 +      map = null;
 +    }
 +    
 +    tmpMap.delete();
 +  }
 +
 +  private void dumpLocalityGroup(FileSKVWriter out, InterruptibleIterator iter) throws IOException {
 +    while (iter.hasTop() && activeIters.size() > 0) {
 +      // RFile does not support MemKey, so we move the kv count into the value only for the RFile.
 +      // There is no need to change the MemKey to a normal key because the kvCount info gets lost when it is written
 +      Value newValue = new MemValue(iter.getTopValue(), ((MemKey) iter.getTopKey()).kvCount);
 +      out.append(iter.getTopKey(), newValue);
 +      iter.next();
 +
 +    }
 +  }
 +}


[08/10] Merge branch '1.5.2-SNAPSHOT' into 1.6.1-SNAPSHOT

Posted by el...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/d54e0fd8/server/tserver/src/main/java/org/apache/accumulo/tserver/Tablet.java
----------------------------------------------------------------------
diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/Tablet.java
index f9fdacb,0000000..703d1ae
mode 100644,000000..100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/Tablet.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/Tablet.java
@@@ -1,3833 -1,0 +1,3838 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.accumulo.tserver;
 +
 +import java.io.ByteArrayInputStream;
 +import java.io.DataInputStream;
 +import java.io.FileNotFoundException;
 +import java.io.IOException;
 +import java.util.ArrayList;
 +import java.util.Arrays;
 +import java.util.Collection;
 +import java.util.Collections;
 +import java.util.Comparator;
 +import java.util.EnumSet;
 +import java.util.HashMap;
 +import java.util.HashSet;
 +import java.util.Iterator;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.Map.Entry;
 +import java.util.PriorityQueue;
 +import java.util.Set;
 +import java.util.SortedMap;
 +import java.util.TreeMap;
 +import java.util.TreeSet;
 +import java.util.concurrent.atomic.AtomicBoolean;
 +import java.util.concurrent.atomic.AtomicLong;
 +import java.util.concurrent.atomic.AtomicReference;
 +import java.util.concurrent.locks.ReentrantLock;
 +
 +import org.apache.accumulo.core.Constants;
 +import org.apache.accumulo.core.client.Connector;
 +import org.apache.accumulo.core.client.IteratorSetting;
 +import org.apache.accumulo.core.client.impl.ScannerImpl;
 +import org.apache.accumulo.core.conf.AccumuloConfiguration;
 +import org.apache.accumulo.core.conf.ConfigurationCopy;
 +import org.apache.accumulo.core.conf.ConfigurationObserver;
 +import org.apache.accumulo.core.conf.Property;
 +import org.apache.accumulo.core.constraints.Violations;
 +import org.apache.accumulo.core.data.ByteSequence;
 +import org.apache.accumulo.core.data.Column;
 +import org.apache.accumulo.core.data.ColumnUpdate;
 +import org.apache.accumulo.core.data.Key;
 +import org.apache.accumulo.core.data.KeyExtent;
 +import org.apache.accumulo.core.data.KeyValue;
 +import org.apache.accumulo.core.data.Mutation;
 +import org.apache.accumulo.core.data.Range;
 +import org.apache.accumulo.core.data.Value;
 +import org.apache.accumulo.core.data.thrift.IterInfo;
 +import org.apache.accumulo.core.data.thrift.MapFileInfo;
 +import org.apache.accumulo.core.file.FileOperations;
 +import org.apache.accumulo.core.file.FileSKVIterator;
 +import org.apache.accumulo.core.iterators.IterationInterruptedException;
 +import org.apache.accumulo.core.iterators.IteratorEnvironment;
 +import org.apache.accumulo.core.iterators.IteratorUtil;
 +import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
 +import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 +import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
 +import org.apache.accumulo.core.iterators.system.ColumnQualifierFilter;
 +import org.apache.accumulo.core.iterators.system.DeletingIterator;
 +import org.apache.accumulo.core.iterators.system.InterruptibleIterator;
 +import org.apache.accumulo.core.iterators.system.MultiIterator;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator.DataSource;
 +import org.apache.accumulo.core.iterators.system.StatsIterator;
 +import org.apache.accumulo.core.iterators.system.VisibilityFilter;
 +import org.apache.accumulo.core.master.thrift.TabletLoadState;
 +import org.apache.accumulo.core.metadata.MetadataTable;
 +import org.apache.accumulo.core.metadata.RootTable;
 +import org.apache.accumulo.core.metadata.schema.DataFileValue;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LogColumnFamily;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ScanFileColumnFamily;
 +import org.apache.accumulo.core.security.Authorizations;
 +import org.apache.accumulo.core.security.ColumnVisibility;
 +import org.apache.accumulo.core.security.Credentials;
 +import org.apache.accumulo.core.tabletserver.log.LogEntry;
 +import org.apache.accumulo.core.util.CachedConfiguration;
 +import org.apache.accumulo.core.util.LocalityGroupUtil;
 +import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError;
 +import org.apache.accumulo.core.util.MapCounter;
 +import org.apache.accumulo.core.util.Pair;
 +import org.apache.accumulo.core.util.UtilWaitThread;
 +import org.apache.accumulo.fate.zookeeper.IZooReaderWriter;
 +import org.apache.accumulo.server.ServerConstants;
 +import org.apache.accumulo.server.client.HdfsZooInstance;
 +import org.apache.accumulo.server.conf.TableConfiguration;
 +import org.apache.accumulo.server.fs.FileRef;
 +import org.apache.accumulo.server.fs.VolumeManager;
 +import org.apache.accumulo.server.fs.VolumeManager.FileType;
 +import org.apache.accumulo.server.fs.VolumeManagerImpl;
 +import org.apache.accumulo.server.fs.VolumeUtil;
 +import org.apache.accumulo.server.fs.VolumeUtil.TabletFiles;
 +import org.apache.accumulo.server.master.state.TServerInstance;
 +import org.apache.accumulo.server.master.tableOps.CompactionIterators;
 +import org.apache.accumulo.server.problems.ProblemReport;
 +import org.apache.accumulo.server.problems.ProblemReports;
 +import org.apache.accumulo.server.problems.ProblemType;
 +import org.apache.accumulo.server.security.SystemCredentials;
 +import org.apache.accumulo.server.tablets.TabletTime;
 +import org.apache.accumulo.server.tablets.UniqueNameAllocator;
 +import org.apache.accumulo.server.util.FileUtil;
 +import org.apache.accumulo.server.util.MasterMetadataUtil;
 +import org.apache.accumulo.server.util.MetadataTableUtil;
 +import org.apache.accumulo.server.util.TabletOperations;
 +import org.apache.accumulo.server.zookeeper.ZooReaderWriter;
 +import org.apache.accumulo.start.classloader.vfs.AccumuloVFSClassLoader;
 +import org.apache.accumulo.trace.instrument.Span;
 +import org.apache.accumulo.trace.instrument.Trace;
 +import org.apache.accumulo.tserver.Compactor.CompactionCanceledException;
 +import org.apache.accumulo.tserver.Compactor.CompactionEnv;
 +import org.apache.accumulo.tserver.FileManager.ScanFileManager;
 +import org.apache.accumulo.tserver.InMemoryMap.MemoryIterator;
 +import org.apache.accumulo.tserver.TabletServer.TservConstraintEnv;
 +import org.apache.accumulo.tserver.TabletServerResourceManager.TabletResourceManager;
 +import org.apache.accumulo.tserver.TabletStatsKeeper.Operation;
 +import org.apache.accumulo.tserver.compaction.CompactionPlan;
 +import org.apache.accumulo.tserver.compaction.CompactionStrategy;
 +import org.apache.accumulo.tserver.compaction.DefaultCompactionStrategy;
 +import org.apache.accumulo.tserver.compaction.MajorCompactionReason;
 +import org.apache.accumulo.tserver.compaction.MajorCompactionRequest;
 +import org.apache.accumulo.tserver.compaction.WriteParameters;
 +import org.apache.accumulo.tserver.constraints.ConstraintChecker;
 +import org.apache.accumulo.tserver.log.DfsLogger;
 +import org.apache.accumulo.tserver.log.MutationReceiver;
 +import org.apache.accumulo.tserver.mastermessage.TabletStatusMessage;
 +import org.apache.accumulo.tserver.metrics.TabletServerMinCMetrics;
 +import org.apache.commons.codec.DecoderException;
 +import org.apache.commons.codec.binary.Hex;
 +import org.apache.hadoop.conf.Configuration;
 +import org.apache.hadoop.fs.FileStatus;
 +import org.apache.hadoop.fs.FileSystem;
 +import org.apache.hadoop.fs.Path;
 +import org.apache.hadoop.io.Text;
 +import org.apache.log4j.Logger;
 +import org.apache.zookeeper.KeeperException;
 +import org.apache.zookeeper.KeeperException.NoNodeException;
 +
 +import com.google.common.annotations.VisibleForTesting;
 +
 +/*
 + * We need to be able to have the master tell a tabletServer to
 + * close this file, and the tablet server to handle all pending client reads
 + * before closing
 + * 
 + */
 +
 +/**
 + * 
 + * this class just provides an interface to read from a MapFile mostly takes care of reporting start and end keys
 + * 
 + * need this because a single row extent can have multiple columns this manages all the columns (each handled by a store) for a single row-extent
 + * 
 + * 
 + */
 +
 +public class Tablet {
 +
 +  enum MinorCompactionReason {
 +    USER, SYSTEM, CLOSE, RECOVERY
 +  }
 +
 +  public class CommitSession {
 +
 +    private int seq;
 +    private InMemoryMap memTable;
 +    private int commitsInProgress;
 +    private long maxCommittedTime = Long.MIN_VALUE;
 +
 +    private CommitSession(int seq, InMemoryMap imm) {
 +      this.seq = seq;
 +      this.memTable = imm;
 +      commitsInProgress = 0;
 +    }
 +
 +    public int getWALogSeq() {
 +      return seq;
 +    }
 +
 +    private void decrementCommitsInProgress() {
 +      if (commitsInProgress < 1)
 +        throw new IllegalStateException("commitsInProgress = " + commitsInProgress);
 +
 +      commitsInProgress--;
 +      if (commitsInProgress == 0)
 +        Tablet.this.notifyAll();
 +    }
 +
 +    private void incrementCommitsInProgress() {
 +      if (commitsInProgress < 0)
 +        throw new IllegalStateException("commitsInProgress = " + commitsInProgress);
 +
 +      commitsInProgress++;
 +    }
 +
 +    private void waitForCommitsToFinish() {
 +      while (commitsInProgress > 0) {
 +        try {
 +          Tablet.this.wait(50);
 +        } catch (InterruptedException e) {
 +          log.warn(e, e);
 +        }
 +      }
 +    }
 +
 +    public void abortCommit(List<Mutation> value) {
 +      Tablet.this.abortCommit(this, value);
 +    }
 +
 +    public void commit(List<Mutation> mutations) {
 +      Tablet.this.commit(this, mutations);
 +    }
 +
 +    public Tablet getTablet() {
 +      return Tablet.this;
 +    }
 +
 +    public boolean beginUpdatingLogsUsed(ArrayList<DfsLogger> copy, boolean mincFinish) {
 +      return Tablet.this.beginUpdatingLogsUsed(memTable, copy, mincFinish);
 +    }
 +
 +    public void finishUpdatingLogsUsed() {
 +      Tablet.this.finishUpdatingLogsUsed();
 +    }
 +
 +    public int getLogId() {
 +      return logId;
 +    }
 +
 +    public KeyExtent getExtent() {
 +      return extent;
 +    }
 +
 +    private void updateMaxCommittedTime(long time) {
 +      maxCommittedTime = Math.max(time, maxCommittedTime);
 +    }
 +
 +    private long getMaxCommittedTime() {
 +      if (maxCommittedTime == Long.MIN_VALUE)
 +        throw new IllegalStateException("Tried to read max committed time when it was never set");
 +      return maxCommittedTime;
 +    }
 +
 +  }
 +
 +  private class TabletMemory {
 +    private InMemoryMap memTable;
 +    private InMemoryMap otherMemTable;
 +    private InMemoryMap deletingMemTable;
 +    private int nextSeq = 1;
 +    private CommitSession commitSession;
 +
 +    TabletMemory() {
 +      try {
 +        memTable = new InMemoryMap(acuTableConf);
 +      } catch (LocalityGroupConfigurationError e) {
 +        throw new RuntimeException(e);
 +      }
 +      commitSession = new CommitSession(nextSeq, memTable);
 +      nextSeq += 2;
 +    }
 +
 +    InMemoryMap getMemTable() {
 +      return memTable;
 +    }
 +
 +    InMemoryMap getMinCMemTable() {
 +      return otherMemTable;
 +    }
 +
 +    CommitSession prepareForMinC() {
 +      if (otherMemTable != null) {
 +        throw new IllegalStateException();
 +      }
 +
 +      if (deletingMemTable != null) {
 +        throw new IllegalStateException();
 +      }
 +
 +      otherMemTable = memTable;
 +      try {
 +        memTable = new InMemoryMap(acuTableConf);
 +      } catch (LocalityGroupConfigurationError e) {
 +        throw new RuntimeException(e);
 +      }
 +
 +      CommitSession oldCommitSession = commitSession;
 +      commitSession = new CommitSession(nextSeq, memTable);
 +      nextSeq += 2;
 +
 +      tabletResources.updateMemoryUsageStats(memTable.estimatedSizeInBytes(), otherMemTable.estimatedSizeInBytes());
 +
 +      return oldCommitSession;
 +    }
 +
 +    void finishedMinC() {
 +
 +      if (otherMemTable == null) {
 +        throw new IllegalStateException();
 +      }
 +
 +      if (deletingMemTable != null) {
 +        throw new IllegalStateException();
 +      }
 +
 +      deletingMemTable = otherMemTable;
 +
 +      otherMemTable = null;
 +      Tablet.this.notifyAll();
 +    }
 +
 +    void finalizeMinC() {
 +      try {
 +        deletingMemTable.delete(15000);
 +      } finally {
 +        synchronized (Tablet.this) {
 +          if (otherMemTable != null) {
 +            throw new IllegalStateException();
 +          }
 +
 +          if (deletingMemTable == null) {
 +            throw new IllegalStateException();
 +          }
 +
 +          deletingMemTable = null;
 +
 +          tabletResources.updateMemoryUsageStats(memTable.estimatedSizeInBytes(), 0);
 +        }
 +      }
 +    }
 +
 +    boolean memoryReservedForMinC() {
 +      return otherMemTable != null || deletingMemTable != null;
 +    }
 +
 +    void waitForMinC() {
 +      while (otherMemTable != null || deletingMemTable != null) {
 +        try {
 +          Tablet.this.wait(50);
 +        } catch (InterruptedException e) {
 +          log.warn(e, e);
 +        }
 +      }
 +    }
 +
 +    void mutate(CommitSession cm, List<Mutation> mutations) {
 +      cm.memTable.mutate(mutations);
 +    }
 +
 +    void updateMemoryUsageStats() {
 +      long other = 0;
 +      if (otherMemTable != null)
 +        other = otherMemTable.estimatedSizeInBytes();
 +      else if (deletingMemTable != null)
 +        other = deletingMemTable.estimatedSizeInBytes();
 +
 +      tabletResources.updateMemoryUsageStats(memTable.estimatedSizeInBytes(), other);
 +    }
 +
 +    List<MemoryIterator> getIterators() {
 +      List<MemoryIterator> toReturn = new ArrayList<MemoryIterator>(2);
 +      toReturn.add(memTable.skvIterator());
 +      if (otherMemTable != null)
 +        toReturn.add(otherMemTable.skvIterator());
 +      return toReturn;
 +    }
 +
 +    void returnIterators(List<MemoryIterator> iters) {
 +      for (MemoryIterator iter : iters) {
 +        iter.close();
 +      }
 +    }
 +
 +    public long getNumEntries() {
 +      if (otherMemTable != null)
 +        return memTable.getNumEntries() + otherMemTable.getNumEntries();
 +      return memTable.getNumEntries();
 +    }
 +
 +    CommitSession getCommitSession() {
 +      return commitSession;
 +    }
 +  }
 +
 +  private TabletMemory tabletMemory;
 +
 +  private final TabletTime tabletTime;
 +  private long persistedTime;
 +  private final Object timeLock = new Object();
 +
 +  private final Path location; // absolute path of this tablets dir
 +  private TServerInstance lastLocation;
 +
 +  private Configuration conf;
 +  private VolumeManager fs;
 +
 +  private TableConfiguration acuTableConf;
 +
 +  private volatile boolean tableDirChecked = false;
 +
 +  private AtomicLong dataSourceDeletions = new AtomicLong(0);
 +  private Set<ScanDataSource> activeScans = new HashSet<ScanDataSource>();
 +
 +  private volatile boolean closing = false;
 +  private boolean closed = false;
 +  private boolean closeComplete = false;
 +
 +  private long lastFlushID = -1;
 +  private long lastCompactID = -1;
 +
 +  private KeyExtent extent;
 +
 +  private TabletResourceManager tabletResources;
 +  final private DatafileManager datafileManager;
 +  private volatile boolean majorCompactionInProgress = false;
 +  private volatile boolean majorCompactionWaitingToStart = false;
 +  private Set<MajorCompactionReason> majorCompactionQueued = Collections.synchronizedSet(EnumSet.noneOf(MajorCompactionReason.class));
 +  private volatile boolean minorCompactionInProgress = false;
 +  private volatile boolean minorCompactionWaitingToStart = false;
 +
 +  private boolean updatingFlushID = false;
 +
 +  private AtomicReference<ConstraintChecker> constraintChecker = new AtomicReference<ConstraintChecker>();
 +
 +  private final String tabletDirectory;
 +
 +  private int writesInProgress = 0;
 +
 +  private static final Logger log = Logger.getLogger(Tablet.class);
 +  public TabletStatsKeeper timer;
 +
 +  private Rate queryRate = new Rate(0.2);
 +  private long queryCount = 0;
 +
 +  private Rate queryByteRate = new Rate(0.2);
 +  private long queryBytes = 0;
 +
 +  private Rate ingestRate = new Rate(0.2);
 +  private long ingestCount = 0;
 +
 +  private Rate ingestByteRate = new Rate(0.2);
 +  private long ingestBytes = 0;
 +
 +  private byte[] defaultSecurityLabel = new byte[0];
 +
 +  private long lastMinorCompactionFinishTime;
 +  private long lastMapFileImportTime;
 +
 +  private volatile long numEntries;
 +  private volatile long numEntriesInMemory;
 +
 +  // a count of the amount of data read by the iterators
 +  private AtomicLong scannedCount = new AtomicLong(0);
 +  private Rate scannedRate = new Rate(0.2);
 +
 +  private ConfigurationObserver configObserver;
 +
 +  private TabletServer tabletServer;
 +
 +  private final int logId;
 +  // ensure we only have one reader/writer of our bulk file notes at at time
 +  public final Object bulkFileImportLock = new Object();
 +
 +  public int getLogId() {
 +    return logId;
 +  }
 +
 +  public static class TabletClosedException extends RuntimeException {
 +    public TabletClosedException(Exception e) {
 +      super(e);
 +    }
 +
 +    public TabletClosedException() {
 +      super();
 +    }
 +
 +    private static final long serialVersionUID = 1L;
 +  }
 +
 +  FileRef getNextMapFilename(String prefix) throws IOException {
 +    String extension = FileOperations.getNewFileExtension(tabletServer.getTableConfiguration(extent));
 +    checkTabletDir();
 +    return new FileRef(location.toString() + "/" + prefix + UniqueNameAllocator.getInstance().getNextName() + "." + extension);
 +  }
 +
 +  private void checkTabletDir() throws IOException {
 +    if (!tableDirChecked) {
 +      checkTabletDir(this.location);
 +      tableDirChecked = true;
 +    }
 +  }
 +
 +  private void checkTabletDir(Path tabletDir) throws IOException {
 +
 +    FileStatus[] files = null;
 +    try {
 +      files = fs.listStatus(tabletDir);
 +    } catch (FileNotFoundException ex) {
 +      // ignored
 +    }
 +
 +    if (files == null) {
 +      if (tabletDir.getName().startsWith("c-"))
 +        log.debug("Tablet " + extent + " had no dir, creating " + tabletDir); // its a clone dir...
 +      else
 +        log.warn("Tablet " + extent + " had no dir, creating " + tabletDir);
 +
 +      fs.mkdirs(tabletDir);
 +    }
 +  }
 +
 +  class DatafileManager {
 +    // access to datafilesizes needs to be synchronized: see CompactionRunner#getNumFiles
 +    final private Map<FileRef,DataFileValue> datafileSizes = Collections.synchronizedMap(new TreeMap<FileRef,DataFileValue>());
 +
 +    DatafileManager(SortedMap<FileRef,DataFileValue> datafileSizes) {
 +      for (Entry<FileRef,DataFileValue> datafiles : datafileSizes.entrySet())
 +        this.datafileSizes.put(datafiles.getKey(), datafiles.getValue());
 +    }
 +
 +    FileRef mergingMinorCompactionFile = null;
 +    Set<FileRef> filesToDeleteAfterScan = new HashSet<FileRef>();
 +    Map<Long,Set<FileRef>> scanFileReservations = new HashMap<Long,Set<FileRef>>();
 +    MapCounter<FileRef> fileScanReferenceCounts = new MapCounter<FileRef>();
 +    long nextScanReservationId = 0;
 +    boolean reservationsBlocked = false;
 +
 +    Set<FileRef> majorCompactingFiles = new HashSet<FileRef>();
 +
 +    Pair<Long,Map<FileRef,DataFileValue>> reserveFilesForScan() {
 +      synchronized (Tablet.this) {
 +
 +        while (reservationsBlocked) {
 +          try {
 +            Tablet.this.wait(50);
 +          } catch (InterruptedException e) {
 +            log.warn(e, e);
 +          }
 +        }
 +
 +        Set<FileRef> absFilePaths = new HashSet<FileRef>(datafileSizes.keySet());
 +
 +        long rid = nextScanReservationId++;
 +
 +        scanFileReservations.put(rid, absFilePaths);
 +
 +        Map<FileRef,DataFileValue> ret = new HashMap<FileRef,DataFileValue>();
 +
 +        for (FileRef path : absFilePaths) {
 +          fileScanReferenceCounts.increment(path, 1);
 +          ret.put(path, datafileSizes.get(path));
 +        }
 +
 +        return new Pair<Long,Map<FileRef,DataFileValue>>(rid, ret);
 +      }
 +    }
 +
 +    void returnFilesForScan(Long reservationId) {
 +
 +      final Set<FileRef> filesToDelete = new HashSet<FileRef>();
 +
 +      synchronized (Tablet.this) {
 +        Set<FileRef> absFilePaths = scanFileReservations.remove(reservationId);
 +
 +        if (absFilePaths == null)
 +          throw new IllegalArgumentException("Unknown scan reservation id " + reservationId);
 +
 +        boolean notify = false;
 +        for (FileRef path : absFilePaths) {
 +          long refCount = fileScanReferenceCounts.decrement(path, 1);
 +          if (refCount == 0) {
 +            if (filesToDeleteAfterScan.remove(path))
 +              filesToDelete.add(path);
 +            notify = true;
 +          } else if (refCount < 0)
 +            throw new IllegalStateException("Scan ref count for " + path + " is " + refCount);
 +        }
 +
 +        if (notify)
 +          Tablet.this.notifyAll();
 +      }
 +
 +      if (filesToDelete.size() > 0) {
 +        log.debug("Removing scan refs from metadata " + extent + " " + filesToDelete);
 +        MetadataTableUtil.removeScanFiles(extent, filesToDelete, SystemCredentials.get(), tabletServer.getLock());
 +      }
 +    }
 +
 +    private void removeFilesAfterScan(Set<FileRef> scanFiles) {
 +      if (scanFiles.size() == 0)
 +        return;
 +
 +      Set<FileRef> filesToDelete = new HashSet<FileRef>();
 +
 +      synchronized (Tablet.this) {
 +        for (FileRef path : scanFiles) {
 +          if (fileScanReferenceCounts.get(path) == 0)
 +            filesToDelete.add(path);
 +          else
 +            filesToDeleteAfterScan.add(path);
 +        }
 +      }
 +
 +      if (filesToDelete.size() > 0) {
 +        log.debug("Removing scan refs from metadata " + extent + " " + filesToDelete);
 +        MetadataTableUtil.removeScanFiles(extent, filesToDelete, SystemCredentials.get(), tabletServer.getLock());
 +      }
 +    }
 +
 +    private TreeSet<FileRef> waitForScansToFinish(Set<FileRef> pathsToWaitFor, boolean blockNewScans, long maxWaitTime) {
 +      long startTime = System.currentTimeMillis();
 +      TreeSet<FileRef> inUse = new TreeSet<FileRef>();
 +
 +      Span waitForScans = Trace.start("waitForScans");
 +      try {
 +        synchronized (Tablet.this) {
 +          if (blockNewScans) {
 +            if (reservationsBlocked)
 +              throw new IllegalStateException();
 +
 +            reservationsBlocked = true;
 +          }
 +
 +          for (FileRef path : pathsToWaitFor) {
 +            while (fileScanReferenceCounts.get(path) > 0 && System.currentTimeMillis() - startTime < maxWaitTime) {
 +              try {
 +                Tablet.this.wait(100);
 +              } catch (InterruptedException e) {
 +                log.warn(e, e);
 +              }
 +            }
 +          }
 +
 +          for (FileRef path : pathsToWaitFor) {
 +            if (fileScanReferenceCounts.get(path) > 0)
 +              inUse.add(path);
 +          }
 +
 +          if (blockNewScans) {
 +            reservationsBlocked = false;
 +            Tablet.this.notifyAll();
 +          }
 +
 +        }
 +      } finally {
 +        waitForScans.stop();
 +      }
 +      return inUse;
 +    }
 +
 +    public void importMapFiles(long tid, Map<FileRef,DataFileValue> pathsString, boolean setTime) throws IOException {
 +
 +      String bulkDir = null;
 +
 +      Map<FileRef,DataFileValue> paths = new HashMap<FileRef,DataFileValue>();
 +      for (Entry<FileRef,DataFileValue> entry : pathsString.entrySet())
 +        paths.put(entry.getKey(), entry.getValue());
 +
 +      for (FileRef tpath : paths.keySet()) {
 +
 +        boolean inTheRightDirectory = false;
 +        Path parent = tpath.path().getParent().getParent();
 +        for (String tablesDir : ServerConstants.getTablesDirs()) {
 +          if (parent.equals(new Path(tablesDir, extent.getTableId().toString()))) {
 +            inTheRightDirectory = true;
 +            break;
 +          }
 +        }
 +        if (!inTheRightDirectory) {
 +          throw new IOException("Data file " + tpath + " not in table dirs");
 +        }
 +
 +        if (bulkDir == null)
 +          bulkDir = tpath.path().getParent().toString();
 +        else if (!bulkDir.equals(tpath.path().getParent().toString()))
 +          throw new IllegalArgumentException("bulk files in different dirs " + bulkDir + " " + tpath);
 +
 +      }
 +
 +      if (extent.isRootTablet()) {
 +        throw new IllegalArgumentException("Can not import files to root tablet");
 +      }
 +
 +      synchronized (bulkFileImportLock) {
 +        Credentials creds = SystemCredentials.get();
 +        Connector conn;
 +        try {
 +          conn = HdfsZooInstance.getInstance().getConnector(creds.getPrincipal(), creds.getToken());
 +        } catch (Exception ex) {
 +          throw new IOException(ex);
 +        }
 +        // Remove any bulk files we've previously loaded and compacted away
 +        List<FileRef> files = MetadataTableUtil.getBulkFilesLoaded(conn, extent, tid);
 +
 +        for (FileRef file : files)
 +          if (paths.keySet().remove(file))
 +            log.debug("Ignoring request to re-import a file already imported: " + extent + ": " + file);
 +
 +        if (paths.size() > 0) {
 +          long bulkTime = Long.MIN_VALUE;
 +          if (setTime) {
 +            for (DataFileValue dfv : paths.values()) {
 +              long nextTime = tabletTime.getAndUpdateTime();
 +              if (nextTime < bulkTime)
 +                throw new IllegalStateException("Time went backwards unexpectedly " + nextTime + " " + bulkTime);
 +              bulkTime = nextTime;
 +              dfv.setTime(bulkTime);
 +            }
 +          }
 +
 +          synchronized (timeLock) {
 +            if (bulkTime > persistedTime)
 +              persistedTime = bulkTime;
 +
 +            MetadataTableUtil.updateTabletDataFile(tid, extent, paths, tabletTime.getMetadataValue(persistedTime), creds, tabletServer.getLock());
 +          }
 +        }
 +      }
 +
 +      synchronized (Tablet.this) {
 +        for (Entry<FileRef,DataFileValue> tpath : paths.entrySet()) {
 +          if (datafileSizes.containsKey(tpath.getKey())) {
 +            log.error("Adding file that is already in set " + tpath.getKey());
 +          }
 +          datafileSizes.put(tpath.getKey(), tpath.getValue());
 +
 +        }
 +
 +        tabletResources.importedMapFiles();
 +
 +        computeNumEntries();
 +      }
 +
 +      for (Entry<FileRef,DataFileValue> entry : paths.entrySet()) {
 +        log.log(TLevel.TABLET_HIST, extent + " import " + entry.getKey() + " " + entry.getValue());
 +      }
 +    }
 +
 +    FileRef reserveMergingMinorCompactionFile() {
 +      if (mergingMinorCompactionFile != null)
 +        throw new IllegalStateException("Tried to reserve merging minor compaction file when already reserved  : " + mergingMinorCompactionFile);
 +
 +      if (extent.isRootTablet())
 +        return null;
 +
 +      int maxFiles = acuTableConf.getMaxFilesPerTablet();
 +
 +      // when a major compaction is running and we are at max files, write out
 +      // one extra file... want to avoid the case where major compaction is
 +      // compacting everything except for the largest file, and therefore the
 +      // largest file is returned for merging.. the following check mostly
 +      // avoids this case, except for the case where major compactions fail or
 +      // are canceled
 +      if (majorCompactingFiles.size() > 0 && datafileSizes.size() == maxFiles)
 +        return null;
 +
 +      if (datafileSizes.size() >= maxFiles) {
 +        // find the smallest file
 +
 +        long min = Long.MAX_VALUE;
 +        FileRef minName = null;
 +
 +        for (Entry<FileRef,DataFileValue> entry : datafileSizes.entrySet()) {
 +          if (entry.getValue().getSize() < min && !majorCompactingFiles.contains(entry.getKey())) {
 +            min = entry.getValue().getSize();
 +            minName = entry.getKey();
 +          }
 +        }
 +
 +        if (minName == null)
 +          return null;
 +
 +        mergingMinorCompactionFile = minName;
 +        return minName;
 +      }
 +
 +      return null;
 +    }
 +
 +    void unreserveMergingMinorCompactionFile(FileRef file) {
 +      if ((file == null && mergingMinorCompactionFile != null) || (file != null && mergingMinorCompactionFile == null)
 +          || (file != null && mergingMinorCompactionFile != null && !file.equals(mergingMinorCompactionFile)))
 +        throw new IllegalStateException("Disagreement " + file + " " + mergingMinorCompactionFile);
 +
 +      mergingMinorCompactionFile = null;
 +    }
 +
 +    void bringMinorCompactionOnline(FileRef tmpDatafile, FileRef newDatafile, FileRef absMergeFile, DataFileValue dfv, CommitSession commitSession, long flushId)
 +        throws IOException {
 +
 +      IZooReaderWriter zoo = ZooReaderWriter.getRetryingInstance();
 +      if (extent.isRootTablet()) {
 +        try {
 +          if (!zoo.isLockHeld(tabletServer.getLock().getLockID())) {
 +            throw new IllegalStateException();
 +          }
 +        } catch (Exception e) {
 +          throw new IllegalStateException("Can not bring major compaction online, lock not held", e);
 +        }
 +      }
 +
 +      // rename before putting in metadata table, so files in metadata table should
 +      // always exist
 +      do {
 +        try {
 +          if (dfv.getNumEntries() == 0) {
 +            fs.deleteRecursively(tmpDatafile.path());
 +          } else {
 +            if (fs.exists(newDatafile.path())) {
 +              log.warn("Target map file already exist " + newDatafile);
 +              fs.deleteRecursively(newDatafile.path());
 +            }
 +            
 +            rename(fs, tmpDatafile.path(), newDatafile.path());
 +          }
 +          break;
 +        } catch (IOException ioe) {
 +          log.warn("Tablet " + extent + " failed to rename " + newDatafile + " after MinC, will retry in 60 secs...", ioe);
 +          UtilWaitThread.sleep(60 * 1000);
 +        }
 +      } while (true);
 +
 +      long t1, t2;
 +
 +      // the code below always assumes merged files are in use by scans... this must be done
 +      // because the in memory list of files is not updated until after the metadata table
 +      // therefore the file is available to scans until memory is updated, but want to ensure
 +      // the file is not available for garbage collection... if memory were updated
 +      // before this point (like major compactions do), then the following code could wait
 +      // for scans to finish like major compactions do.... used to wait for scans to finish
 +      // here, but that was incorrect because a scan could start after waiting but before
 +      // memory was updated... assuming the file is always in use by scans leads to
 +      // one uneeded metadata update when it was not actually in use
 +      Set<FileRef> filesInUseByScans = Collections.emptySet();
 +      if (absMergeFile != null)
 +        filesInUseByScans = Collections.singleton(absMergeFile);
 +
 +      // very important to write delete entries outside of log lock, because
 +      // this metadata write does not go up... it goes sideways or to itself
 +      if (absMergeFile != null)
 +        MetadataTableUtil.addDeleteEntries(extent, Collections.singleton(absMergeFile), SystemCredentials.get());
 +
 +      Set<String> unusedWalLogs = beginClearingUnusedLogs();
 +      try {
 +        // the order of writing to metadata and walog is important in the face of machine/process failures
 +        // need to write to metadata before writing to walog, when things are done in the reverse order
 +        // data could be lost... the minor compaction start even should be written before the following metadata
 +        // write is made
 +
 +        synchronized (timeLock) {
 +          if (commitSession.getMaxCommittedTime() > persistedTime)
 +            persistedTime = commitSession.getMaxCommittedTime();
 +
 +          String time = tabletTime.getMetadataValue(persistedTime);
 +          MasterMetadataUtil.updateTabletDataFile(extent, newDatafile, absMergeFile, dfv, time, SystemCredentials.get(), filesInUseByScans,
 +              tabletServer.getClientAddressString(), tabletServer.getLock(), unusedWalLogs, lastLocation, flushId);
 +        }
 +
 +      } finally {
 +        finishClearingUnusedLogs();
 +      }
 +
 +      do {
 +        try {
 +          // the purpose of making this update use the new commit session, instead of the old one passed in,
 +          // is because the new one will reference the logs used by current memory...
 +
 +          tabletServer.minorCompactionFinished(tabletMemory.getCommitSession(), newDatafile.toString(), commitSession.getWALogSeq() + 2);
 +          break;
 +        } catch (IOException e) {
 +          log.error("Failed to write to write-ahead log " + e.getMessage() + " will retry", e);
 +          UtilWaitThread.sleep(1 * 1000);
 +        }
 +      } while (true);
 +
 +      synchronized (Tablet.this) {
 +        lastLocation = null;
 +
 +        t1 = System.currentTimeMillis();
 +        if (datafileSizes.containsKey(newDatafile)) {
 +          log.error("Adding file that is already in set " + newDatafile);
 +        }
 +
 +        if (dfv.getNumEntries() > 0) {
 +          datafileSizes.put(newDatafile, dfv);
 +        }
 +
 +        if (absMergeFile != null) {
 +          datafileSizes.remove(absMergeFile);
 +        }
 +
 +        unreserveMergingMinorCompactionFile(absMergeFile);
 +
 +        dataSourceDeletions.incrementAndGet();
 +        tabletMemory.finishedMinC();
 +
 +        lastFlushID = flushId;
 +
 +        computeNumEntries();
 +        t2 = System.currentTimeMillis();
 +      }
 +
 +      // must do this after list of files in memory is updated above
 +      removeFilesAfterScan(filesInUseByScans);
 +
 +      if (absMergeFile != null)
 +        log.log(TLevel.TABLET_HIST, extent + " MinC [" + absMergeFile + ",memory] -> " + newDatafile);
 +      else
 +        log.log(TLevel.TABLET_HIST, extent + " MinC [memory] -> " + newDatafile);
 +      log.debug(String.format("MinC finish lock %.2f secs %s", (t2 - t1) / 1000.0, getExtent().toString()));
 +      if (dfv.getSize() > acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD)) {
 +        log.debug(String.format("Minor Compaction wrote out file larger than split threshold.  split threshold = %,d  file size = %,d",
 +            acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD), dfv.getSize()));
 +      }
 +
 +    }
 +
 +    public void reserveMajorCompactingFiles(Collection<FileRef> files) {
 +      if (majorCompactingFiles.size() != 0)
 +        throw new IllegalStateException("Major compacting files not empty " + majorCompactingFiles);
 +
 +      if (mergingMinorCompactionFile != null && files.contains(mergingMinorCompactionFile))
 +        throw new IllegalStateException("Major compaction tried to resrve file in use by minor compaction " + mergingMinorCompactionFile);
 +
 +      majorCompactingFiles.addAll(files);
 +    }
 +
 +    public void clearMajorCompactingFile() {
 +      majorCompactingFiles.clear();
 +    }
 +
 +    void bringMajorCompactionOnline(Set<FileRef> oldDatafiles, FileRef tmpDatafile, FileRef newDatafile, Long compactionId, DataFileValue dfv)
 +        throws IOException {
 +      long t1, t2;
 +
 +      if (!extent.isRootTablet()) {
 +
 +        if (fs.exists(newDatafile.path())) {
 +          log.error("Target map file already exist " + newDatafile, new Exception());
 +          throw new IllegalStateException("Target map file already exist " + newDatafile);
 +        }
 +
 +        // rename before putting in metadata table, so files in metadata table should
 +        // always exist
 +        rename(fs, tmpDatafile.path(), newDatafile.path());
 +        
 +        if (dfv.getNumEntries() == 0) {
 +          fs.deleteRecursively(newDatafile.path());
 +        }
 +      }
 +
 +      TServerInstance lastLocation = null;
 +      synchronized (Tablet.this) {
 +
 +        t1 = System.currentTimeMillis();
 +
 +        IZooReaderWriter zoo = ZooReaderWriter.getRetryingInstance();
 +
 +        dataSourceDeletions.incrementAndGet();
 +
 +        if (extent.isRootTablet()) {
 +
 +          waitForScansToFinish(oldDatafiles, true, Long.MAX_VALUE);
 +
 +          try {
 +            if (!zoo.isLockHeld(tabletServer.getLock().getLockID())) {
 +              throw new IllegalStateException();
 +            }
 +          } catch (Exception e) {
 +            throw new IllegalStateException("Can not bring major compaction online, lock not held", e);
 +          }
 +
 +          // mark files as ready for deletion, but
 +          // do not delete them until we successfully
 +          // rename the compacted map file, in case
 +          // the system goes down
 +
 +          RootFiles.replaceFiles(acuTableConf, fs, location, oldDatafiles, tmpDatafile, newDatafile);
 +        }
 +
 +        // atomically remove old files and add new file
 +        for (FileRef oldDatafile : oldDatafiles) {
 +          if (!datafileSizes.containsKey(oldDatafile)) {
 +            log.error("file does not exist in set " + oldDatafile);
 +          }
 +          datafileSizes.remove(oldDatafile);
 +          majorCompactingFiles.remove(oldDatafile);
 +        }
 +
 +        if (datafileSizes.containsKey(newDatafile)) {
 +          log.error("Adding file that is already in set " + newDatafile);
 +        }
 +
 +        if (dfv.getNumEntries() > 0) {
 +          datafileSizes.put(newDatafile, dfv);
 +        }
 +
 +        // could be used by a follow on compaction in a multipass compaction
 +        majorCompactingFiles.add(newDatafile);
 +
 +        computeNumEntries();
 +
 +        lastLocation = Tablet.this.lastLocation;
 +        Tablet.this.lastLocation = null;
 +
 +        if (compactionId != null)
 +          lastCompactID = compactionId;
 +
 +        t2 = System.currentTimeMillis();
 +      }
 +
 +      if (!extent.isRootTablet()) {
 +        Set<FileRef> filesInUseByScans = waitForScansToFinish(oldDatafiles, false, 10000);
 +        if (filesInUseByScans.size() > 0)
 +          log.debug("Adding scan refs to metadata " + extent + " " + filesInUseByScans);
 +        MasterMetadataUtil.replaceDatafiles(extent, oldDatafiles, filesInUseByScans, newDatafile, compactionId, dfv, SystemCredentials.get(),
 +            tabletServer.getClientAddressString(), lastLocation, tabletServer.getLock());
 +        removeFilesAfterScan(filesInUseByScans);
 +      }
 +
 +      log.debug(String.format("MajC finish lock %.2f secs", (t2 - t1) / 1000.0));
 +      log.log(TLevel.TABLET_HIST, extent + " MajC " + oldDatafiles + " --> " + newDatafile);
 +    }
 +
 +    public SortedMap<FileRef,DataFileValue> getDatafileSizes() {
 +      synchronized (Tablet.this) {
 +        TreeMap<FileRef,DataFileValue> copy = new TreeMap<FileRef,DataFileValue>(datafileSizes);
 +        return Collections.unmodifiableSortedMap(copy);
 +      }
 +    }
 +
 +    public Set<FileRef> getFiles() {
 +      synchronized (Tablet.this) {
 +        HashSet<FileRef> files = new HashSet<FileRef>(datafileSizes.keySet());
 +        return Collections.unmodifiableSet(files);
 +      }
 +    }
 +
 +  }
 +
 +  public Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm, SortedMap<Key,Value> tabletsKeyValues)
 +      throws IOException {
 +    this(tabletServer, location, extent, trm, CachedConfiguration.getInstance(), tabletsKeyValues);
 +    splitCreationTime = 0;
 +  }
 +
 +  public Tablet(KeyExtent extent, TabletServer tabletServer, TabletResourceManager trm, SplitInfo info) throws IOException {
 +    this(tabletServer, new Text(info.dir), extent, trm, CachedConfiguration.getInstance(), info.datafiles, info.time, info.initFlushID, info.initCompactID, info.lastLocation);
 +    splitCreationTime = System.currentTimeMillis();
 +  }
 +
 +  /**
 +   * Only visibile for testing
 +   */
 +  @VisibleForTesting
 +  protected Tablet(TabletTime tabletTime, String tabletDirectory, int logId, Path location, DatafileManager datafileManager) {
 +    this.tabletTime = tabletTime;
 +    this.tabletDirectory = tabletDirectory;
 +    this.logId = logId;
 +    this.location = location;
 +    this.datafileManager = datafileManager; 
 +  }
 +
 +  private Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm, Configuration conf,
 +      SortedMap<Key,Value> tabletsKeyValues) throws IOException {
 +    this(tabletServer, location, extent, trm, conf, VolumeManagerImpl.get(), tabletsKeyValues);
 +  }
 +
 +  static private final List<LogEntry> EMPTY = Collections.emptyList();
 +
 +  private Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm, Configuration conf,
 +      SortedMap<FileRef,DataFileValue> datafiles, String time, long initFlushID, long initCompactID, TServerInstance last) throws IOException {
 +    this(tabletServer, location, extent, trm, conf, VolumeManagerImpl.get(), EMPTY, datafiles, time, last, new HashSet<FileRef>(), initFlushID, initCompactID);
 +  }
 +
 +  private static String lookupTime(AccumuloConfiguration conf, KeyExtent extent, SortedMap<Key,Value> tabletsKeyValues) {
 +    SortedMap<Key,Value> entries;
 +
 +    if (extent.isRootTablet()) {
 +      return null;
 +    } else {
 +      entries = new TreeMap<Key,Value>();
 +      Text rowName = extent.getMetadataEntry();
 +      for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
 +        if (entry.getKey().compareRow(rowName) == 0 && TabletsSection.ServerColumnFamily.TIME_COLUMN.hasColumns(entry.getKey())) {
 +          entries.put(new Key(entry.getKey()), new Value(entry.getValue()));
 +        }
 +      }
 +    }
 +
 +    // log.debug("extent : "+extent+"   entries : "+entries);
 +
 +    if (entries.size() == 1)
 +      return entries.values().iterator().next().toString();
 +    return null;
 +  }
 +
 +  private static SortedMap<FileRef,DataFileValue> lookupDatafiles(AccumuloConfiguration conf, VolumeManager fs, KeyExtent extent,
 +      SortedMap<Key,Value> tabletsKeyValues) throws IOException {
 +
 +    TreeMap<FileRef,DataFileValue> datafiles = new TreeMap<FileRef,DataFileValue>();
 +
 +    if (extent.isRootTablet()) { // the meta0 tablet
 +      Path location = new Path(MetadataTableUtil.getRootTabletDir());
 +
 +      // cleanUpFiles() has special handling for delete. files
 +      FileStatus[] files = fs.listStatus(location);
 +      Collection<String> goodPaths = RootFiles.cleanupReplacement(fs, files, true);
 +      for (String good : goodPaths) {
 +        Path path = new Path(good);
 +        String filename = path.getName();
 +        FileRef ref = new FileRef(location.toString() + "/" + filename, path);
 +        DataFileValue dfv = new DataFileValue(0, 0);
 +        datafiles.put(ref, dfv);
 +      }
 +    } else {
 +
 +      Text rowName = extent.getMetadataEntry();
 +
 +      String tableId = extent.isMeta() ? RootTable.ID : MetadataTable.ID;
 +      ScannerImpl mdScanner = new ScannerImpl(HdfsZooInstance.getInstance(), SystemCredentials.get(), tableId, Authorizations.EMPTY);
 +
 +      // Commented out because when no data file is present, each tablet will scan through metadata table and return nothing
 +      // reduced batch size to improve performance
 +      // changed here after endKeys were implemented from 10 to 1000
 +      mdScanner.setBatchSize(1000);
 +
 +      // leave these in, again, now using endKey for safety
 +      mdScanner.fetchColumnFamily(DataFileColumnFamily.NAME);
 +
 +      mdScanner.setRange(new Range(rowName));
 +
 +      for (Entry<Key,Value> entry : mdScanner) {
 +
 +        if (entry.getKey().compareRow(rowName) != 0) {
 +          break;
 +        }
 +
 +        FileRef ref = new FileRef(fs, entry.getKey());
 +        datafiles.put(ref, new DataFileValue(entry.getValue().get()));
 +      }
 +    }
 +    return datafiles;
 +  }
 +
 +  private static List<LogEntry> lookupLogEntries(KeyExtent ke, SortedMap<Key,Value> tabletsKeyValues) {
 +    List<LogEntry> logEntries = new ArrayList<LogEntry>();
 +
 +    if (ke.isMeta()) {
 +      try {
 +        logEntries = MetadataTableUtil.getLogEntries(SystemCredentials.get(), ke);
 +      } catch (Exception ex) {
 +        throw new RuntimeException("Unable to read tablet log entries", ex);
 +      }
 +    } else {
 +      log.debug("Looking at metadata " + tabletsKeyValues);
 +      Text row = ke.getMetadataEntry();
 +      for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
 +        Key key = entry.getKey();
 +        if (key.getRow().equals(row)) {
 +          if (key.getColumnFamily().equals(LogColumnFamily.NAME)) {
 +            logEntries.add(LogEntry.fromKeyValue(key, entry.getValue()));
 +          }
 +        }
 +      }
 +    }
 +
 +    log.debug("got " + logEntries + " for logs for " + ke);
 +    return logEntries;
 +  }
 +
 +  private static Set<FileRef> lookupScanFiles(KeyExtent extent, SortedMap<Key,Value> tabletsKeyValues, VolumeManager fs) throws IOException {
 +    HashSet<FileRef> scanFiles = new HashSet<FileRef>();
 +
 +    Text row = extent.getMetadataEntry();
 +    for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
 +      Key key = entry.getKey();
 +      if (key.getRow().equals(row) && key.getColumnFamily().equals(ScanFileColumnFamily.NAME)) {
 +        scanFiles.add(new FileRef(fs, key));
 +      }
 +    }
 +
 +    return scanFiles;
 +  }
 +
 +  private static long lookupFlushID(KeyExtent extent, SortedMap<Key,Value> tabletsKeyValues) {
 +    Text row = extent.getMetadataEntry();
 +    for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
 +      Key key = entry.getKey();
 +      if (key.getRow().equals(row) && TabletsSection.ServerColumnFamily.FLUSH_COLUMN.equals(key.getColumnFamily(), key.getColumnQualifier()))
 +        return Long.parseLong(entry.getValue().toString());
 +    }
 +
 +    return -1;
 +  }
 +
 +  private static long lookupCompactID(KeyExtent extent, SortedMap<Key,Value> tabletsKeyValues) {
 +    Text row = extent.getMetadataEntry();
 +    for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
 +      Key key = entry.getKey();
 +      if (key.getRow().equals(row) && TabletsSection.ServerColumnFamily.COMPACT_COLUMN.equals(key.getColumnFamily(), key.getColumnQualifier()))
 +        return Long.parseLong(entry.getValue().toString());
 +    }
 +
 +    return -1;
 +  }
 +
 +  private Tablet(TabletServer tabletServer, Text location, KeyExtent extent, TabletResourceManager trm, Configuration conf, VolumeManager fs,
 +      SortedMap<Key,Value> tabletsKeyValues) throws IOException {
 +    this(tabletServer, location, extent, trm, conf, fs, lookupLogEntries(extent, tabletsKeyValues), lookupDatafiles(tabletServer.getSystemConfiguration(), fs,
 +        extent, tabletsKeyValues), lookupTime(tabletServer.getSystemConfiguration(), extent, tabletsKeyValues), lookupLastServer(extent, tabletsKeyValues),
 +        lookupScanFiles(extent, tabletsKeyValues, fs), lookupFlushID(extent, tabletsKeyValues), lookupCompactID(extent, tabletsKeyValues));
 +  }
 +
 +  private static TServerInstance lookupLastServer(KeyExtent extent, SortedMap<Key,Value> tabletsKeyValues) {
 +    for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
 +      if (entry.getKey().getColumnFamily().compareTo(TabletsSection.LastLocationColumnFamily.NAME) == 0) {
 +        return new TServerInstance(entry.getValue(), entry.getKey().getColumnQualifier());
 +      }
 +    }
 +    return null;
 +  }
 +
 +  /**
 +   * yet another constructor - this one allows us to avoid costly lookups into the Metadata table if we already know the files we need - as at split time
 +   */
 +  private Tablet(final TabletServer tabletServer, final Text location, final KeyExtent extent, final TabletResourceManager trm, final Configuration conf,
 +      final VolumeManager fs, final List<LogEntry> rawLogEntries, final SortedMap<FileRef,DataFileValue> rawDatafiles, String time,
 +      final TServerInstance lastLocation, Set<FileRef> scanFiles, long initFlushID, long initCompactID) throws IOException {
 +
 +    TabletFiles tabletPaths = VolumeUtil.updateTabletVolumes(tabletServer.getLock(), fs, extent, new TabletFiles(location.toString(), rawLogEntries,
 +        rawDatafiles));
 +
 +    Path locationPath;
 +
 +    if (tabletPaths.dir.contains(":")) {
 +      locationPath = new Path(tabletPaths.dir.toString());
 +    } else {
 +      locationPath = fs.getFullPath(FileType.TABLE, extent.getTableId().toString() + tabletPaths.dir.toString());
 +    }
 +
 +    final List<LogEntry> logEntries = tabletPaths.logEntries;
 +    final SortedMap<FileRef,DataFileValue> datafiles = tabletPaths.datafiles;
 +
 +    this.location = locationPath;
 +    this.lastLocation = lastLocation;
 +    this.tabletDirectory = tabletPaths.dir;
 +    this.conf = conf;
 +    this.acuTableConf = tabletServer.getTableConfiguration(extent);
 +
 +    this.fs = fs;
 +    this.extent = extent;
 +    this.tabletResources = trm;
 +
 +    this.lastFlushID = initFlushID;
 +    this.lastCompactID = initCompactID;
 +
 +    if (extent.isRootTablet()) {
 +      long rtime = Long.MIN_VALUE;
 +      for (FileRef ref : datafiles.keySet()) {
 +        Path path = ref.path();
 +        FileSystem ns = fs.getVolumeByPath(path).getFileSystem();
 +        FileSKVIterator reader = FileOperations.getInstance().openReader(path.toString(), true, ns, ns.getConf(), tabletServer.getTableConfiguration(extent));
 +        long maxTime = -1;
 +        try {
 +
 +          while (reader.hasTop()) {
 +            maxTime = Math.max(maxTime, reader.getTopKey().getTimestamp());
 +            reader.next();
 +          }
 +
 +        } finally {
 +          reader.close();
 +        }
 +
 +        if (maxTime > rtime) {
 +          time = TabletTime.LOGICAL_TIME_ID + "" + maxTime;
 +          rtime = maxTime;
 +        }
 +      }
 +    }
 +    if (time == null && datafiles.isEmpty() && extent.equals(RootTable.OLD_EXTENT)) {
 +      // recovery... old root tablet has no data, so time doesn't matter:
 +      time = TabletTime.LOGICAL_TIME_ID + "" + Long.MIN_VALUE;
 +    }
 +
 +    this.tabletServer = tabletServer;
 +    this.logId = tabletServer.createLogId(extent);
 +
 +    this.timer = new TabletStatsKeeper();
 +
 +    setupDefaultSecurityLabels(extent);
 +
 +    tabletMemory = new TabletMemory();
 +    tabletTime = TabletTime.getInstance(time);
 +    persistedTime = tabletTime.getTime();
 +
 +    acuTableConf.addObserver(configObserver = new ConfigurationObserver() {
 +
 +      private void reloadConstraints() {
 +        constraintChecker.set(new ConstraintChecker(acuTableConf));
 +      }
 +
 +      @Override
 +      public void propertiesChanged() {
 +        reloadConstraints();
 +
 +        try {
 +          setupDefaultSecurityLabels(extent);
 +        } catch (Exception e) {
 +          log.error("Failed to reload default security labels for extent: " + extent.toString());
 +        }
 +      }
 +
 +      @Override
 +      public void propertyChanged(String prop) {
 +        if (prop.startsWith(Property.TABLE_CONSTRAINT_PREFIX.getKey()))
 +          reloadConstraints();
 +        else if (prop.equals(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY.getKey())) {
 +          try {
 +            log.info("Default security labels changed for extent: " + extent.toString());
 +            setupDefaultSecurityLabels(extent);
 +          } catch (Exception e) {
 +            log.error("Failed to reload default security labels for extent: " + extent.toString());
 +          }
 +        }
 +
 +      }
 +
 +      @Override
 +      public void sessionExpired() {
 +        log.debug("Session expired, no longer updating per table props...");
 +      }
 +
 +    });
 +    
 +    acuTableConf.getNamespaceConfiguration().addObserver(configObserver);
 +    
 +    // Force a load of any per-table properties
 +    configObserver.propertiesChanged();
 +
 +    tabletResources.setTablet(this, acuTableConf);
 +    if (!logEntries.isEmpty()) {
 +      log.info("Starting Write-Ahead Log recovery for " + this.extent);
 +      // count[0] = entries used on tablet
 +      // count[1] = track max time from walog entries wihtout timestamps
 +      final long[] count = new long[2];
 +      final CommitSession commitSession = tabletMemory.getCommitSession();
 +      count[1] = Long.MIN_VALUE;
 +      try {
 +        Set<String> absPaths = new HashSet<String>();
 +        for (FileRef ref : datafiles.keySet())
 +          absPaths.add(ref.path().toString());
 +
 +        tabletServer.recover(this.tabletServer.getFileSystem(), this, logEntries, absPaths, new MutationReceiver() {
 +          @Override
 +          public void receive(Mutation m) {
 +            // LogReader.printMutation(m);
 +            Collection<ColumnUpdate> muts = m.getUpdates();
 +            for (ColumnUpdate columnUpdate : muts) {
 +              if (!columnUpdate.hasTimestamp()) {
 +                // if it is not a user set timestamp, it must have been set
 +                // by the system
 +                count[1] = Math.max(count[1], columnUpdate.getTimestamp());
 +              }
 +            }
 +            tabletMemory.mutate(commitSession, Collections.singletonList(m));
 +            count[0]++;
 +          }
 +        });
 +
 +        if (count[1] != Long.MIN_VALUE) {
 +          tabletTime.useMaxTimeFromWALog(count[1]);
 +        }
 +        commitSession.updateMaxCommittedTime(tabletTime.getTime());
 +
 +        if (count[0] == 0) {
 +          log.debug("No replayed mutations applied, removing unused entries for " + extent);
 +          MetadataTableUtil.removeUnusedWALEntries(extent, logEntries, tabletServer.getLock());
 +          logEntries.clear();
 +        }
 +
 +      } catch (Throwable t) {
 +        if (acuTableConf.getBoolean(Property.TABLE_FAILURES_IGNORE)) {
 +          log.warn("Error recovering from log files: ", t);
 +        } else {
 +          throw new RuntimeException(t);
 +        }
 +      }
 +      // make some closed references that represent the recovered logs
 +      currentLogs = new HashSet<DfsLogger>();
 +      for (LogEntry logEntry : logEntries) {
 +        for (String log : logEntry.logSet) {
 +          currentLogs.add(new DfsLogger(tabletServer.getServerConfig(), log, logEntry.getColumnQualifier().toString()));
 +        }
 +      }
 +
 +      log.info("Write-Ahead Log recovery complete for " + this.extent + " (" + count[0] + " mutations applied, " + tabletMemory.getNumEntries()
 +          + " entries created)");
 +    }
 +
 +    String contextName = acuTableConf.get(Property.TABLE_CLASSPATH);
 +    if (contextName != null && !contextName.equals("")) {
 +      // initialize context classloader, instead of possibly waiting for it to initialize for a scan
 +      // TODO this could hang, causing other tablets to fail to load - ACCUMULO-1292
 +      AccumuloVFSClassLoader.getContextManager().getClassLoader(contextName);
 +    }
 +
 +    // do this last after tablet is completely setup because it
 +    // could cause major compaction to start
 +    datafileManager = new DatafileManager(datafiles);
 +
 +    computeNumEntries();
 +
 +    datafileManager.removeFilesAfterScan(scanFiles);
 +
 +    // look for hints of a failure on the previous tablet server
 +    if (!logEntries.isEmpty() || needsMajorCompaction(MajorCompactionReason.NORMAL)) {
 +      // look for any temp files hanging around
 +      removeOldTemporaryFiles();
 +    }
 +
 +    log.log(TLevel.TABLET_HIST, extent + " opened");
 +  }
 +
 +  private void removeOldTemporaryFiles() {
 +    // remove any temporary files created by a previous tablet server
 +    try {
 +      for (FileStatus tmp : fs.globStatus(new Path(location, "*_tmp"))) {
 +        try {
 +          log.debug("Removing old temp file " + tmp.getPath());
 +          fs.delete(tmp.getPath());
 +        } catch (IOException ex) {
 +          log.error("Unable to remove old temp file " + tmp.getPath() + ": " + ex);
 +        }
 +      }
 +    } catch (IOException ex) {
 +      log.error("Error scanning for old temp files in " + location);
 +    }
 +  }
 +
 +  private void setupDefaultSecurityLabels(KeyExtent extent) {
 +    if (extent.isMeta()) {
 +      defaultSecurityLabel = new byte[0];
 +    } else {
 +      try {
 +        ColumnVisibility cv = new ColumnVisibility(acuTableConf.get(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY));
 +        this.defaultSecurityLabel = cv.getExpression();
 +      } catch (Exception e) {
 +        log.error(e, e);
 +        this.defaultSecurityLabel = new byte[0];
 +      }
 +    }
 +  }
 +
 +  public static class KVEntry extends KeyValue {
 +    public KVEntry(Key k, Value v) {
 +      super(new Key(k), Arrays.copyOf(v.get(), v.get().length));
 +    }
 +
 +    @Override
 +    public String toString() {
 +      return key.toString() + "=" + getValue();
 +    }
 +
 +    int numBytes() {
 +      return key.getSize() + getValue().get().length;
 +    }
 +
 +    int estimateMemoryUsed() {
 +      return key.getSize() + getValue().get().length + (9 * 32); // overhead is 32 per object
 +    }
 +  }
 +
 +  private LookupResult lookup(SortedKeyValueIterator<Key,Value> mmfi, List<Range> ranges, HashSet<Column> columnSet, ArrayList<KVEntry> results,
 +      long maxResultsSize) throws IOException {
 +
 +    LookupResult lookupResult = new LookupResult();
 +
 +    boolean exceededMemoryUsage = false;
 +    boolean tabletClosed = false;
 +
 +    Set<ByteSequence> cfset = null;
 +    if (columnSet.size() > 0)
 +      cfset = LocalityGroupUtil.families(columnSet);
 +
 +    for (Range range : ranges) {
 +
 +      if (exceededMemoryUsage || tabletClosed) {
 +        lookupResult.unfinishedRanges.add(range);
 +        continue;
 +      }
 +
 +      int entriesAdded = 0;
 +
 +      try {
 +        if (cfset != null)
 +          mmfi.seek(range, cfset, true);
 +        else
 +          mmfi.seek(range, LocalityGroupUtil.EMPTY_CF_SET, false);
 +
 +        while (mmfi.hasTop()) {
 +          Key key = mmfi.getTopKey();
 +
 +          KVEntry kve = new KVEntry(key, mmfi.getTopValue());
 +          results.add(kve);
 +          entriesAdded++;
 +          lookupResult.bytesAdded += kve.estimateMemoryUsed();
 +          lookupResult.dataSize += kve.numBytes();
 +
 +          exceededMemoryUsage = lookupResult.bytesAdded > maxResultsSize;
 +
 +          if (exceededMemoryUsage) {
 +            addUnfinishedRange(lookupResult, range, key, false);
 +            break;
 +          }
 +
 +          mmfi.next();
 +        }
 +
 +      } catch (TooManyFilesException tmfe) {
 +        // treat this as a closed tablet, and let the client retry
 +        log.warn("Tablet " + getExtent() + " has too many files, batch lookup can not run");
 +        handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded);
 +        tabletClosed = true;
 +      } catch (IOException ioe) {
 +        if (shutdownInProgress()) {
 +          // assume HDFS shutdown hook caused this exception
 +          log.debug("IOException while shutdown in progress ", ioe);
 +          handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded);
 +          tabletClosed = true;
 +        } else {
 +          throw ioe;
 +        }
 +      } catch (IterationInterruptedException iie) {
 +        if (isClosed()) {
 +          handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded);
 +          tabletClosed = true;
 +        } else {
 +          throw iie;
 +        }
 +      } catch (TabletClosedException tce) {
 +        handleTabletClosedDuringScan(results, lookupResult, exceededMemoryUsage, range, entriesAdded);
 +        tabletClosed = true;
 +      }
 +
 +    }
 +
 +    return lookupResult;
 +  }
 +
 +  private void handleTabletClosedDuringScan(ArrayList<KVEntry> results, LookupResult lookupResult, boolean exceededMemoryUsage, Range range, int entriesAdded) {
 +    if (exceededMemoryUsage)
 +      throw new IllegalStateException("tablet should not exceed memory usage or close, not both");
 +
 +    if (entriesAdded > 0)
 +      addUnfinishedRange(lookupResult, range, results.get(results.size() - 1).key, false);
 +    else
 +      lookupResult.unfinishedRanges.add(range);
 +
 +    lookupResult.closed = true;
 +  }
 +
 +  private void addUnfinishedRange(LookupResult lookupResult, Range range, Key key, boolean inclusiveStartKey) {
 +    if (range.getEndKey() == null || key.compareTo(range.getEndKey()) < 0) {
 +      Range nlur = new Range(new Key(key), inclusiveStartKey, range.getEndKey(), range.isEndKeyInclusive());
 +      lookupResult.unfinishedRanges.add(nlur);
 +    }
 +  }
 +
 +  public static interface KVReceiver {
 +    void receive(List<KVEntry> matches) throws IOException;
 +  }
 +
 +  class LookupResult {
 +    List<Range> unfinishedRanges = new ArrayList<Range>();
 +    long bytesAdded = 0;
 +    long dataSize = 0;
 +    boolean closed = false;
 +  }
 +
 +  public LookupResult lookup(List<Range> ranges, HashSet<Column> columns, Authorizations authorizations, ArrayList<KVEntry> results, long maxResultSize,
 +      List<IterInfo> ssiList, Map<String,Map<String,String>> ssio, AtomicBoolean interruptFlag) throws IOException {
 +
 +    if (ranges.size() == 0) {
 +      return new LookupResult();
 +    }
 +
 +    ranges = Range.mergeOverlapping(ranges);
 +    Collections.sort(ranges);
 +
 +    Range tabletRange = extent.toDataRange();
 +    for (Range range : ranges) {
 +      // do a test to see if this range falls within the tablet, if it does not
 +      // then clip will throw an exception
 +      tabletRange.clip(range);
 +    }
 +
 +    ScanDataSource dataSource = new ScanDataSource(authorizations, this.defaultSecurityLabel, columns, ssiList, ssio, interruptFlag);
 +
 +    LookupResult result = null;
 +
 +    try {
 +      SortedKeyValueIterator<Key,Value> iter = new SourceSwitchingIterator(dataSource);
 +      result = lookup(iter, ranges, columns, results, maxResultSize);
 +      return result;
 +    } catch (IOException ioe) {
 +      dataSource.close(true);
 +      throw ioe;
 +    } finally {
 +      // code in finally block because always want
 +      // to return mapfiles, even when exception is thrown
 +      dataSource.close(false);
 +
 +      synchronized (this) {
 +        queryCount += results.size();
 +        if (result != null)
 +          queryBytes += result.dataSize;
 +      }
 +    }
 +  }
 +
 +  private Batch nextBatch(SortedKeyValueIterator<Key,Value> iter, Range range, int num, Set<Column> columns) throws IOException {
 +
 +    // log.info("In nextBatch..");
 +
 +    List<KVEntry> results = new ArrayList<KVEntry>();
 +    Key key = null;
 +
 +    Value value;
 +    long resultSize = 0L;
 +    long resultBytes = 0L;
 +
 +    long maxResultsSize = acuTableConf.getMemoryInBytes(Property.TABLE_SCAN_MAXMEM);
 +
 +    if (columns.size() == 0) {
 +      iter.seek(range, LocalityGroupUtil.EMPTY_CF_SET, false);
 +    } else {
 +      iter.seek(range, LocalityGroupUtil.families(columns), true);
 +    }
 +
 +    Key continueKey = null;
 +    boolean skipContinueKey = false;
 +
 +    boolean endOfTabletReached = false;
 +    while (iter.hasTop()) {
 +
 +      value = iter.getTopValue();
 +      key = iter.getTopKey();
 +
 +      KVEntry kvEntry = new KVEntry(key, value); // copies key and value
 +      results.add(kvEntry);
 +      resultSize += kvEntry.estimateMemoryUsed();
 +      resultBytes += kvEntry.numBytes();
 +
 +      if (resultSize >= maxResultsSize || results.size() >= num) {
 +        continueKey = new Key(key);
 +        skipContinueKey = true;
 +        break;
 +      }
 +
 +      iter.next();
 +    }
 +
 +    if (iter.hasTop() == false) {
 +      endOfTabletReached = true;
 +    }
 +
 +    Batch retBatch = new Batch();
 +    retBatch.numBytes = resultBytes;
 +
 +    if (!endOfTabletReached) {
 +      retBatch.continueKey = continueKey;
 +      retBatch.skipContinueKey = skipContinueKey;
 +    } else {
 +      retBatch.continueKey = null;
 +    }
 +
 +    if (endOfTabletReached && results.size() == 0)
 +      retBatch.results = null;
 +    else
 +      retBatch.results = results;
 +
 +    return retBatch;
 +  }
 +
 +  /**
 +   * Determine if a JVM shutdown is in progress.
 +   * 
 +   */
 +  private boolean shutdownInProgress() {
 +    try {
 +      Runtime.getRuntime().removeShutdownHook(new Thread(new Runnable() {
 +        @Override
 +        public void run() {}
 +      }));
 +    } catch (IllegalStateException ise) {
 +      return true;
 +    }
 +
 +    return false;
 +  }
 +
 +  private class Batch {
 +    public boolean skipContinueKey;
 +    public List<KVEntry> results;
 +    public Key continueKey;
 +    public long numBytes;
 +  }
 +
 +  Scanner createScanner(Range range, int num, Set<Column> columns, Authorizations authorizations, List<IterInfo> ssiList, Map<String,Map<String,String>> ssio,
 +      boolean isolated, AtomicBoolean interruptFlag) {
 +    // do a test to see if this range falls within the tablet, if it does not
 +    // then clip will throw an exception
 +    extent.toDataRange().clip(range);
 +
 +    ScanOptions opts = new ScanOptions(num, authorizations, this.defaultSecurityLabel, columns, ssiList, ssio, interruptFlag, isolated);
 +    return new Scanner(range, opts);
 +  }
 +
 +  class ScanBatch {
 +    boolean more;
 +    List<KVEntry> results;
 +
 +    ScanBatch(List<KVEntry> results, boolean more) {
 +      this.results = results;
 +      this.more = more;
 +    }
 +  }
 +
 +  class Scanner {
 +
 +    private ScanOptions options;
 +    private Range range;
 +    private SortedKeyValueIterator<Key,Value> isolatedIter;
 +    private ScanDataSource isolatedDataSource;
 +    private boolean sawException = false;
 +    private boolean scanClosed = false;
 +
 +    Scanner(Range range, ScanOptions options) {
 +      this.range = range;
 +      this.options = options;
 +    }
 +
 +    synchronized ScanBatch read() throws IOException, TabletClosedException {
 +
 +      if (sawException)
 +        throw new IllegalStateException("Tried to use scanner after exception occurred.");
 +
 +      if (scanClosed)
 +        throw new IllegalStateException("Tried to use scanner after it was closed.");
 +
 +      Batch results = null;
 +
 +      ScanDataSource dataSource;
 +
 +      if (options.isolated) {
 +        if (isolatedDataSource == null)
 +          isolatedDataSource = new ScanDataSource(options);
 +        dataSource = isolatedDataSource;
 +      } else {
 +        dataSource = new ScanDataSource(options);
 +      }
 +
 +      try {
 +
 +        SortedKeyValueIterator<Key,Value> iter;
 +
 +        if (options.isolated) {
 +          if (isolatedIter == null)
 +            isolatedIter = new SourceSwitchingIterator(dataSource, true);
 +          else
 +            isolatedDataSource.fileManager.reattach();
 +          iter = isolatedIter;
 +        } else {
 +          iter = new SourceSwitchingIterator(dataSource, false);
 +        }
 +
 +        results = nextBatch(iter, range, options.num, options.columnSet);
 +
 +        if (results.results == null) {
 +          range = null;
 +          return new ScanBatch(new ArrayList<Tablet.KVEntry>(), false);
 +        } else if (results.continueKey == null) {
 +          return new ScanBatch(results.results, false);
 +        } else {
 +          range = new Range(results.continueKey, !results.skipContinueKey, range.getEndKey(), range.isEndKeyInclusive());
 +          return new ScanBatch(results.results, true);
 +        }
 +
 +      } catch (IterationInterruptedException iie) {
 +        sawException = true;
 +        if (isClosed())
 +          throw new TabletClosedException(iie);
 +        else
 +          throw iie;
 +      } catch (IOException ioe) {
 +        if (shutdownInProgress()) {
 +          log.debug("IOException while shutdown in progress ", ioe);
 +          throw new TabletClosedException(ioe); // assume IOException was caused by execution of HDFS shutdown hook
 +        }
 +
 +        sawException = true;
 +        dataSource.close(true);
 +        throw ioe;
 +      } catch (RuntimeException re) {
 +        sawException = true;
 +        throw re;
 +      } finally {
 +        // code in finally block because always want
 +        // to return mapfiles, even when exception is thrown
 +        if (!options.isolated)
 +          dataSource.close(false);
 +        else if (dataSource.fileManager != null)
 +          dataSource.fileManager.detach();
 +
 +        synchronized (Tablet.this) {
 +          if (results != null && results.results != null) {
 +            long more = results.results.size();
 +            queryCount += more;
 +            queryBytes += results.numBytes;
 +          }
 +        }
 +      }
 +    }
 +
 +    // close and read are synchronized because can not call close on the data source while it is in use
 +    // this cloud lead to the case where file iterators that are in use by a thread are returned
 +    // to the pool... this would be bad
 +    void close() {
 +      options.interruptFlag.set(true);
 +      synchronized (this) {
 +        scanClosed = true;
 +        if (isolatedDataSource != null)
 +          isolatedDataSource.close(false);
 +      }
 +    }
 +  }
 +
 +  static class ScanOptions {
 +
 +    // scan options
 +    Authorizations authorizations;
 +    byte[] defaultLabels;
 +    Set<Column> columnSet;
 +    List<IterInfo> ssiList;
 +    Map<String,Map<String,String>> ssio;
 +    AtomicBoolean interruptFlag;
 +    int num;
 +    boolean isolated;
 +
 +    ScanOptions(int num, Authorizations authorizations, byte[] defaultLabels, Set<Column> columnSet, List<IterInfo> ssiList,
 +        Map<String,Map<String,String>> ssio, AtomicBoolean interruptFlag, boolean isolated) {
 +      this.num = num;
 +      this.authorizations = authorizations;
 +      this.defaultLabels = defaultLabels;
 +      this.columnSet = columnSet;
 +      this.ssiList = ssiList;
 +      this.ssio = ssio;
 +      this.interruptFlag = interruptFlag;
 +      this.isolated = isolated;
 +    }
 +
 +  }
 +
 +  class ScanDataSource implements DataSource {
 +
 +    // data source state
 +    private ScanFileManager fileManager;
 +    private SortedKeyValueIterator<Key,Value> iter;
 +    private long expectedDeletionCount;
 +    private List<MemoryIterator> memIters = null;
 +    private long fileReservationId;
 +    private AtomicBoolean interruptFlag;
 +    private StatsIterator statsIterator;
 +
 +    ScanOptions options;
 +
 +    ScanDataSource(Authorizations authorizations, byte[] defaultLabels, HashSet<Column> columnSet, List<IterInfo> ssiList, Map<String,Map<String,String>> ssio,
 +        AtomicBoolean interruptFlag) {
 +      expectedDeletionCount = dataSourceDeletions.get();
 +      this.options = new ScanOptions(-1, authorizations, defaultLabels, columnSet, ssiList, ssio, interruptFlag, false);
 +      this.interruptFlag = interruptFlag;
 +    }
 +
 +    ScanDataSource(ScanOptions options) {
 +      expectedDeletionCount = dataSourceDeletions.get();
 +      this.options = options;
 +      this.interruptFlag = options.interruptFlag;
 +    }
 +
 +    @Override
 +    public DataSource getNewDataSource() {
 +      if (!isCurrent()) {
 +        // log.debug("Switching data sources during a scan");
 +        if (memIters != null) {
 +          tabletMemory.returnIterators(memIters);
 +          memIters = null;
 +          datafileManager.returnFilesForScan(fileReservationId);
 +          fileReservationId = -1;
 +        }
 +
 +        if (fileManager != null)
 +          fileManager.releaseOpenFiles(false);
 +
 +        expectedDeletionCount = dataSourceDeletions.get();
 +        iter = null;
 +
 +        return this;
 +      } else
 +        return this;
 +    }
 +
 +    @Override
 +    public boolean isCurrent() {
 +      return expectedDeletionCount == dataSourceDeletions.get();
 +    }
 +
 +    @Override
 +    public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
 +      if (iter == null)
 +        iter = createIterator();
 +      return iter;
 +    }
 +
 +    private SortedKeyValueIterator<Key,Value> createIterator() throws IOException {
 +
 +      Map<FileRef,DataFileValue> files;
 +
 +      synchronized (Tablet.this) {
 +
 +        if (memIters != null)
 +          throw new IllegalStateException("Tried to create new scan iterator w/o releasing memory");
 +
 +        if (Tablet.this.closed)
 +          throw new TabletClosedException();
 +
 +        if (interruptFlag.get())
 +          throw new IterationInterruptedException(extent.toString() + " " + interruptFlag.hashCode());
 +
 +        // only acquire the file manager when we know the tablet is open
 +        if (fileManager == null) {
 +          fileManager = tabletResources.newScanFileManager();
 +          activeScans.add(this);
 +        }
 +
 +        if (fileManager.getNumOpenFiles() != 0)
 +          throw new IllegalStateException("Tried to create new scan iterator w/o releasing files");
 +
 +        // set this before trying to get iterators in case
 +        // getIterators() throws an exception
 +        expectedDeletionCount = dataSourceDeletions.get();
 +
 +        memIters = tabletMemory.getIterators();
 +        Pair<Long,Map<FileRef,DataFileValue>> reservation = datafileManager.reserveFilesForScan();
 +        fileReservationId = reservation.getFirst();
 +        files = reservation.getSecond();
 +      }
 +
 +      Collection<InterruptibleIterator> mapfiles = fileManager.openFiles(files, options.isolated);
 +
 +      List<SortedKeyValueIterator<Key,Value>> iters = new ArrayList<SortedKeyValueIterator<Key,Value>>(mapfiles.size() + memIters.size());
 +
 +      iters.addAll(mapfiles);
 +      iters.addAll(memIters);
 +
 +      for (SortedKeyValueIterator<Key,Value> skvi : iters)
 +        ((InterruptibleIterator) skvi).setInterruptFlag(interruptFlag);
 +
 +      MultiIterator multiIter = new MultiIterator(iters, extent);
 +
 +      TabletIteratorEnvironment iterEnv = new TabletIteratorEnvironment(IteratorScope.scan, acuTableConf, fileManager, files);
 +
 +      statsIterator = new StatsIterator(multiIter, TabletServer.seekCount, scannedCount);
 +
 +      DeletingIterator delIter = new DeletingIterator(statsIterator, false);
 +
 +      ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);
 +
 +      ColumnQualifierFilter colFilter = new ColumnQualifierFilter(cfsi, options.columnSet);
 +
 +      VisibilityFilter visFilter = new VisibilityFilter(colFilter, options.authorizations, options.defaultLabels);
 +
 +      return iterEnv.getTopLevelIterator(IteratorUtil
 +          .loadIterators(IteratorScope.scan, visFilter, extent, acuTableConf, options.ssiList, options.ssio, iterEnv));
 +    }
 +
 +    private void close(boolean sawErrors) {
 +
 +      if (memIters != null) {
 +        tabletMemory.returnIterators(memIters);
 +        memIters = null;
 +        datafileManager.returnFilesForScan(fileReservationId);
 +        fileReservationId = -1;
 +      }
 +
 +      synchronized (Tablet.this) {
 +        activeScans.remove(this);
 +        if (activeScans.size() == 0)
 +          Tablet.this.notifyAll();
 +      }
 +
 +      if (fileManager != null) {
 +        fileManager.releaseOpenFiles(sawErrors);
 +        fileManager = null;
 +      }
 +
 +      if (statsIterator != null) {
 +        statsIterator.report();
 +      }
 +
 +    }
 +
 +    public void interrupt() {
 +      interruptFlag.set(true);
 +    }
 +
 +    @Override
 +    public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
 +      throw new UnsupportedOperationException();
 +    }
 +
++    @Override
++    public void setInterruptFlag(AtomicBoolean flag) {
++      throw new UnsupportedOperationException();
++    }
++    
 +  }
 +
 +  private DataFileValue minorCompact(Configuration conf, VolumeManager fs, InMemoryMap memTable, FileRef tmpDatafile, FileRef newDatafile, FileRef mergeFile,
 +      boolean hasQueueTime, long queued, CommitSession commitSession, long flushId, MinorCompactionReason mincReason) {
 +    boolean failed = false;
 +    long start = System.currentTimeMillis();
 +    timer.incrementStatusMinor();
 +
 +    long count = 0;
 +
 +    try {
 +      Span span = Trace.start("write");
 +      CompactionStats stats;
 +      try {
 +        count = memTable.getNumEntries();
 +
 +        DataFileValue dfv = null;
 +        if (mergeFile != null)
 +          dfv = datafileManager.getDatafileSizes().get(mergeFile);
 +
 +        MinorCompactor compactor = new MinorCompactor(conf, fs, memTable, mergeFile, dfv, tmpDatafile, acuTableConf, extent, mincReason);
 +        stats = compactor.call();
 +      } finally {
 +        span.stop();
 +      }
 +      span = Trace.start("bringOnline");
 +      try {
 +        datafileManager.bringMinorCompactionOnline(tmpDatafile, newDatafile, mergeFile, new DataFileValue(stats.getFileSize(), stats.getEntriesWritten()),
 +            commitSession, flushId);
 +      } finally {
 +        span.stop();
 +      }
 +      return new DataFileValue(stats.getFileSize(), stats.getEntriesWritten());
 +    } catch (Exception E) {
 +      failed = true;
 +      throw new RuntimeException(E);
 +    } catch (Error E) {
 +      // Weird errors like "OutOfMemoryError" when trying to create the thread for the compaction
 +      failed = true;
 +      throw new RuntimeException(E);
 +    } finally {
 +      try {
 +        tabletMemory.finalizeMinC();
 +      } catch (Throwable t) {
 +        log.error("Failed to free tablet memory", t);
 +      }
 +
 +      if (!failed) {
 +        lastMinorCompactionFinishTime = System.currentTimeMillis();
 +      }
 +      if (tabletServer.mincMetrics.isEnabled())
 +        tabletServer.mincMetrics.add(TabletServerMinCMetrics.minc, (lastMinorCompactionFinishTime - start));
 +      if (hasQueueTime) {
 +        timer.updateTime(Operation.MINOR, queued, start, count, failed);
 +        if (tabletServer.mincMetrics.isEnabled())
 +          tabletServer.mincMetrics.add(TabletServerMinCMetrics.queue, (start - queued));
 +      } else
 +        timer.updateTime(Operation.MINOR, start, count, failed);
 +    }
 +  }
 +
 +  private class MinorCompactionTask implements Runnable {
 +
 +    private long queued;
 +    private CommitSession commitSession;
 +    private DataFileValue stats;
 +    private FileRef mergeFile;
 +    private long flushId;
 +    private MinorCompactionReason mincReason;
 +
 +    MinorCompactionTask(FileRef mergeFile, CommitSession commitSession, long flushId, MinorCompactionReason mincReason) {
 +      queued = System.currentTimeMillis();
 +      minorCompactionWaitingToStart = true;
 +      this.commitSession = commitSession;
 +      this.mergeFile = mergeFile;
 +      this.flushId = flushId;
 +      this.mincReason = mincReason;
 +    }
 +
 +    @Override
 +    public void run() {
 +      minorCompactionWaitingToStart = false;
 +      minorCompactionInProgress = true;
 +      Span minorCompaction = Trace.on("minorCompaction");
 +      try {
 +        FileRef newMapfileLocation = getNextMapFilename(mergeFile == null ? "F" : "M");
 +        FileRef tmpFileRef = new FileRef(newMapfileLocation.path() + "_tmp");
 +        Span span = Trace.start("waitForCommits");
 +        synchronized (Tablet.this) {
 +          commitSession.waitForCommitsToFinish();
 +        }
 +        span.stop();
 +        span = Trace.start("start");
 +        while (true) {
 +          try {
 +            // the purpose of the minor compaction start event is to keep track of the filename... in the case
 +            // where the metadata table write for the minor compaction finishes and the process dies before
 +            // writing the minor compaction finish event, then the start event+filename in metadata table will
 +            // prevent recovery of duplicate data... the minor compaction start event could be written at any time
 +            // before the metadata write for the minor compaction
 +            tabletServer.minorCompactionStarted(commitSession, commitSession.getWALogSeq() + 1, newMapfileLocation.path().toString());
 +            break;
 +          } catch (IOException e) {
 +            log.warn("Failed to write to write ahead log " + e.getMessage(), e);
 +          }
 +        }
 +        span.stop();
 +        span = Trace.start("compact");
 +        this.stats = minorCompact(conf, fs, tabletMemory.getMinCMemTable(), tmpFileRef, newMapfileLocation, mergeFile, true, queued, commitSession, flushId,
 +            mincReason);
 +        span.stop();
 +
 +        if (needsSplit()) {
 +          tabletServer.executeSplit(Tablet.this);
 +        } else {
 +          initiateMajorCompaction(MajorCompactionReason.NORMAL);
 +        }
 +      } catch (Throwable t) {
 +        log.error("Unknown error during minor compaction for extent: " + getExtent(), t);
 +        throw new RuntimeException(t);
 +      } finally {
 +        minorCompactionInProgress = false;
 +        minorCompaction.data("extent", extent.toString());
 +        minorCompaction.data("numEntries", Long.toString(this.stats.getNumEntries()));
 +        minorCompaction.data("size", Long.toString(this.stats.getSize()));
 +        minorCompaction.stop();
 +      }
 +    }
 +  }
 +
 +  private synchronized MinorCompactionTask prepareForMinC(long flushId, MinorCompactionReason mincReason) {
 +    CommitSession oldCommitSession = tabletMemory.prepareForMinC();
 +    otherLogs = currentLogs;
 +    currentLogs = new HashSet<DfsLogger>();
 +
 +    FileRef mergeFile = null;
 +    if (mincReason != MinorCompactionReason.RECOVERY) {
 +      mergeFile = datafileManager.reserveMergingMinorCompactionFile();
 +    }
 +
 +    return new MinorCompactionTask(mergeFile, oldCommitSession, flushId, mincReason);
 +
 +  }
 +
 +  void flush(long tableFlushID) {
 +    boolean updateMetadata = false;
 +    boolean initiateMinor = false;
 +
 +    try {
 +
 +      synchronized (this) {
 +
 +        // only want one thing at a time to update flush ID to ensure that metadata table and tablet in memory state are consistent
 +        if (updatingFlushID)
 +          return;
 +
 +        if (lastFlushID >= tableFlushID)
 +          return;
 +
 +        if (closing || closed || tabletMemory.memoryReservedForMinC())
 +          return;
 +
 +        if (tabletMemory.getMemTable().getNumEntries() == 0) {
 +          lastFlushID = tableFlushID;
 +          updatingFlushID = true;
 +          updateMetadata = true;
 +        } else
 +          initiateMinor = true;
 +      }
 +
 +      if (updateMetadata) {
 +        Credentials creds = SystemCredentials.get();
 +        // if multiple threads were allowed to update this outside of a sync block, then it would be
 +        // a race condition
 +        MetadataTableUtil.updateTabletFlushID(extent, tableFlushID, creds, tabletServer.getLock());
 +      } else if (initiateMinor)
 +        initiateMinorCompaction(tableFlushID, MinorCompactionReason.USER);
 +
 +    } finally {
 +      if (updateMetadata) {
 +        synchronized (this) {
 +          updatingFlushID = false;
 +          this.notifyAll();
 +        }
 +      }
 +    }
 +
 +  }
 +
 +  boolean initiateMinorCompaction(MinorCompactionReason mincReason) {
 +    if (isClosed()) {
 +      // don't bother trying to get flush id if closed... could be closed after this check but that is ok... just trying to cut down on uneeded log messages....
 +      return false;
 +    }
 +
 +    // get the flush id before the new memmap is made available for write
 +    long flushId;
 +    try {
 +      flushId = getFlushID();
 +    } catch (NoNodeException e) {
 +      log.info("Asked to initiate MinC when there was no flush id " + getExtent() + " " + e.getMessage());
 +      return false;
 +    }
 +    return initiateMinorCompaction(flushId, mincReason);
 +  }
 +
 +  boolean minorCompactNow(MinorCompactionReason mincReason) {
 +    long flushId;
 +    try {
 +      flushId = getFlushID();
 +    } catch (NoNodeException e) {
 +      log.info("Asked to initiate MinC when there was no flush id " + getExtent() + " " + e.getMessage());
 +      return false;
 +    }
 +    MinorCompactionTask mct = createMinorCompactionTask(flushId, mincReason);
 +    if (mct == null)
 +      return false;
 +    mct.run();
 +    return true;
 +  }
 +
 +  boolean initiateMinorCompaction(long flushId, MinorCompactionReason mincReason) {
 +    MinorCompactionTask mct = createMinorCompactionTask(flushId, mincReason);
 +    if (mct == null)
 +      return false;
 +    tabletResources.executeMinorCompaction(mct);
 +    return true;
 +  }
 +
 +  private MinorCompactionTask createMinorCompactionTask(long flushId, MinorCompactionReason mincReason) {
 +    MinorCompactionTask mct;
 +    long t1, t2;
 +
 +    StringBuilder logMessage = null;
 +
 +    try {
 +      synchronized (this) {
 +        t1 = System.currentTimeMillis();
 +
 +        if (closing || closed || majorCompactionWaitingToStart || tabletMemory.memoryReservedForMinC() || tabletMemory.getMemTable().getNumEntries() == 0
 +            || updatingFlushID) {
 +
 +          logMessage = new StringBuilder();
 +
 +          logMessage.append(extent.toString());
 +          logMessage.append(" closing " + closing);
 +          logMessage.append(" closed " + closed);
 +          logMessage.append(" majorCompactionWaitingToStart " + majorCompactionWaitingToStart);
 +          if (tabletMemory != null)
 +            logMessage.append(" tabletMemory.memoryReservedForMinC() " + tabletMemory.memoryReservedForMinC());
 +          if (tabletMemory != null && tabletMemory.getMemTable() != null)
 +            logMessage.append(" tabletMemory.getMemTable().getNumEntries() " + tabletMemory.getMemTable().getNumEntries());
 +          logMessage.append(" updatingFlushID " + updatingFlushID);
 +
 +          return null;
 +        }
 +
 +        mct = prepareForMinC(flushId, mincReason);
 +        t2 = System.currentTimeMillis();
 +      }
 +    } finally {
 +      // log outside of sync block
 +      if (logMessage != null && log.isDebugEnabled())
 +        log.debug(logMessage);
 +    }
 +
 +    log.debug(String.format("MinC initiate lock %.2f secs", (t2 - t1) / 1000.0));
 +    return mct;
 +  }
 +
 +  long getFlushID() throws NoNodeException {
 +    try {
 +      String zTablePath = Constants.ZROOT + "/" + HdfsZooInstance.getInstance().getInstanceID() + Constants.ZTABLES + "/" + extent.getTableId()
 +          + Constants.ZTABLE_FLUSH_ID;
 +      return Long.parseLong(new String(ZooReaderWriter.getRetryingInstance().getData(zTablePath, null), Constants.UTF8));
 +    } catch (InterruptedException e) {
 +      throw new RuntimeException(e);
 +    } catch (NumberFormatException nfe) {
 +      throw new RuntimeException(nfe);
 +    } catch (KeeperException ke) {
 +      if (ke instanceof NoNodeException) {
 +        throw (NoNodeException) ke;
 +      } else {
 +        throw new RuntimeException(ke);
 +      }
 +    }
 +  }
 +
 +  long getCompactionCancelID() {
 +    String zTablePath = Constants.ZROOT + "/" + HdfsZooInstance.getInstance().getInstanceID() + Constants.ZTABLES + "/" + extent.getTableId()
 +        + Constants.ZTABLE_COMPACT_CANCEL_ID;
 +
 +    try {
 +      return Long.parseLong(new String(ZooReaderWriter.getRetryingInstance().getData(zTablePath, null), Constants.UTF8));
 +    } catch (KeeperException e) {
 +      throw new RuntimeException(e);
 +    } catch (InterruptedException e) {
 +      throw new RuntimeException(e);
 +    }
 +  }
 +
 +  Pair<Long,List<IteratorSetting>> getCompactionID() throws NoNodeException {
 +    try {
 +      String zTablePath = Constants.ZROOT + "/" + HdfsZooInstance.getInstance().getInstanceID() + Constants.ZTABLES + "/" + extent.getTableId()
 +          + Constants.ZTABLE_COMP

<TRUNCATED>

[04/10] Merge branch '1.5.2-SNAPSHOT' into 1.6.1-SNAPSHOT

Posted by el...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/d54e0fd8/server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java
----------------------------------------------------------------------
diff --cc server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java
index 3932552,0000000..39c4c39
mode 100644,000000..100644
--- a/server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java
+++ b/server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java
@@@ -1,557 -1,0 +1,584 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.accumulo.tserver;
 +
 +import static org.junit.Assert.assertEquals;
 +import static org.junit.Assert.assertFalse;
 +import static org.junit.Assert.assertTrue;
++import static org.junit.Assert.fail;
 +
 +import java.io.File;
 +import java.io.IOException;
 +import java.util.ArrayList;
 +import java.util.Arrays;
 +import java.util.Collections;
 +import java.util.HashMap;
 +import java.util.HashSet;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.Set;
 +import java.util.concurrent.ExecutorService;
 +import java.util.concurrent.Executors;
 +import java.util.concurrent.TimeUnit;
++import java.util.concurrent.atomic.AtomicBoolean;
 +
 +import org.apache.accumulo.core.data.ArrayByteSequence;
 +import org.apache.accumulo.core.data.ByteSequence;
 +import org.apache.accumulo.core.data.Key;
 +import org.apache.accumulo.core.data.Mutation;
 +import org.apache.accumulo.core.data.Range;
 +import org.apache.accumulo.core.data.Value;
++import org.apache.accumulo.core.iterators.IterationInterruptedException;
 +import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 +import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
 +import org.apache.accumulo.core.util.LocalityGroupUtil;
 +import org.apache.accumulo.server.client.HdfsZooInstance;
 +import org.apache.accumulo.server.conf.ZooConfiguration;
 +import org.apache.accumulo.tserver.InMemoryMap.MemoryIterator;
 +import org.apache.hadoop.io.Text;
 +import org.apache.log4j.Level;
 +import org.apache.log4j.Logger;
 +import org.junit.BeforeClass;
 +import org.junit.Ignore;
 +import org.junit.Rule;
 +import org.junit.Test;
 +import org.junit.rules.TemporaryFolder;
 +
 +public class InMemoryMapTest {
 +
 +  @BeforeClass
 +  public static void setUp() throws Exception {
 +    // suppress log messages having to do with not having an instance
 +    Logger.getLogger(ZooConfiguration.class).setLevel(Level.OFF);
 +    Logger.getLogger(HdfsZooInstance.class).setLevel(Level.OFF);
 +  }
 +
 +  @Rule
 +  public TemporaryFolder tempFolder = new TemporaryFolder(new File(System.getProperty("user.dir") + "/target"));
 +
 +  public void mutate(InMemoryMap imm, String row, String column, long ts) {
 +    Mutation m = new Mutation(new Text(row));
 +    String[] sa = column.split(":");
 +    m.putDelete(new Text(sa[0]), new Text(sa[1]), ts);
 +
 +    imm.mutate(Collections.singletonList(m));
 +  }
 +
 +  public void mutate(InMemoryMap imm, String row, String column, long ts, String value) {
 +    Mutation m = new Mutation(new Text(row));
 +    String[] sa = column.split(":");
 +    m.put(new Text(sa[0]), new Text(sa[1]), ts, new Value(value.getBytes()));
 +
 +    imm.mutate(Collections.singletonList(m));
 +  }
 +
 +  static Key nk(String row, String column, long ts) {
 +    String[] sa = column.split(":");
 +    Key k = new Key(new Text(row), new Text(sa[0]), new Text(sa[1]), ts);
 +    return k;
 +  }
 +
 +  static void ae(SortedKeyValueIterator<Key,Value> dc, String row, String column, int ts, String val) throws IOException {
 +    assertTrue(dc.hasTop());
 +    assertEquals(nk(row, column, ts), dc.getTopKey());
 +    assertEquals(new Value(val.getBytes()), dc.getTopValue());
 +    dc.next();
 +
 +  }
 +
 +  static Set<ByteSequence> newCFSet(String... cfs) {
 +    HashSet<ByteSequence> cfSet = new HashSet<ByteSequence>();
 +    for (String cf : cfs) {
 +      cfSet.add(new ArrayByteSequence(cf));
 +    }
 +    return cfSet;
 +  }
 +
 +  @Test
 +  public void test2() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    MemoryIterator ski1 = imm.skvIterator();
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    MemoryIterator ski2 = imm.skvIterator();
 +
 +    ski1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    assertFalse(ski1.hasTop());
 +
 +    ski2.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    assertTrue(ski2.hasTop());
 +    ae(ski2, "r1", "foo:cq1", 3, "bar1");
 +    assertFalse(ski2.hasTop());
 +
 +  }
 +
 +  @Test
 +  public void test3() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq1", 3, "bar2");
 +    MemoryIterator ski1 = imm.skvIterator();
 +    mutate(imm, "r1", "foo:cq1", 3, "bar3");
 +
 +    mutate(imm, "r3", "foo:cq1", 3, "bar9");
 +    mutate(imm, "r3", "foo:cq1", 3, "bara");
 +
 +    MemoryIterator ski2 = imm.skvIterator();
 +
 +    ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar2");
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +    assertFalse(ski1.hasTop());
 +
 +    ski2.seek(new Range(new Text("r3")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski2, "r3", "foo:cq1", 3, "bara");
 +    ae(ski2, "r3", "foo:cq1", 3, "bar9");
 +    assertFalse(ski1.hasTop());
 +
 +  }
 +
 +  @Test
 +  public void test4() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq1", 3, "bar2");
 +    MemoryIterator ski1 = imm.skvIterator();
 +    mutate(imm, "r1", "foo:cq1", 3, "bar3");
 +
 +    imm.delete(0);
 +
 +    ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar2");
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +    assertFalse(ski1.hasTop());
 +
 +    ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar2");
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +    assertFalse(ski1.hasTop());
 +
 +    ski1.seek(new Range(new Text("r2")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    assertFalse(ski1.hasTop());
 +
 +    ski1.seek(new Range(nk("r1", "foo:cq1", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar2");
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +    assertFalse(ski1.hasTop());
 +
 +    ski1.close();
 +  }
 +
 +  @Test
 +  public void test5() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq1", 3, "bar2");
 +    mutate(imm, "r1", "foo:cq1", 3, "bar3");
 +
 +    MemoryIterator ski1 = imm.skvIterator();
 +    ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar3");
 +
 +    imm.delete(0);
 +
 +    ae(ski1, "r1", "foo:cq1", 3, "bar2");
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +    assertFalse(ski1.hasTop());
 +
 +    ski1.close();
 +
 +    imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq2", 3, "bar2");
 +    mutate(imm, "r1", "foo:cq3", 3, "bar3");
 +
 +    ski1 = imm.skvIterator();
 +    ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +
 +    imm.delete(0);
 +
 +    ae(ski1, "r1", "foo:cq2", 3, "bar2");
 +    ae(ski1, "r1", "foo:cq3", 3, "bar3");
 +    assertFalse(ski1.hasTop());
 +
 +    ski1.close();
 +  }
 +
 +  @Test
 +  public void test6() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq2", 3, "bar2");
 +    mutate(imm, "r1", "foo:cq3", 3, "bar3");
 +    mutate(imm, "r1", "foo:cq4", 3, "bar4");
 +
 +    MemoryIterator ski1 = imm.skvIterator();
 +
 +    mutate(imm, "r1", "foo:cq5", 3, "bar5");
 +
 +    SortedKeyValueIterator<Key,Value> dc = ski1.deepCopy(null);
 +
 +    ski1.seek(new Range(nk("r1", "foo:cq1", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +
 +    dc.seek(new Range(nk("r1", "foo:cq2", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(dc, "r1", "foo:cq2", 3, "bar2");
 +
 +    imm.delete(0);
 +
 +    ae(ski1, "r1", "foo:cq2", 3, "bar2");
 +    ae(dc, "r1", "foo:cq3", 3, "bar3");
 +    ae(ski1, "r1", "foo:cq3", 3, "bar3");
 +    ae(dc, "r1", "foo:cq4", 3, "bar4");
 +    ae(ski1, "r1", "foo:cq4", 3, "bar4");
 +    assertFalse(ski1.hasTop());
 +    assertFalse(dc.hasTop());
 +
 +    ski1.seek(new Range(nk("r1", "foo:cq3", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +
 +    dc.seek(new Range(nk("r1", "foo:cq4", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(dc, "r1", "foo:cq4", 3, "bar4");
 +    assertFalse(dc.hasTop());
 +
 +    ae(ski1, "r1", "foo:cq3", 3, "bar3");
 +    ae(ski1, "r1", "foo:cq4", 3, "bar4");
 +    assertFalse(ski1.hasTop());
 +    assertFalse(dc.hasTop());
 +
 +    ski1.close();
 +  }
- 
-   private void deepCopyAndDelete(int interleaving) throws Exception {
++  
++  private void deepCopyAndDelete(int interleaving, boolean interrupt) throws Exception {
 +    // interleaving == 0 intentionally omitted, this runs the test w/o deleting in mem map
 +
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +    
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq2", 3, "bar2");
 +    
 +    MemoryIterator ski1 = imm.skvIterator();
 +    
-     if (interleaving == 1)
++    AtomicBoolean iflag = new AtomicBoolean(false);
++    ski1.setInterruptFlag(iflag);
++
++    if (interleaving == 1) {
 +      imm.delete(0);
++      if (interrupt)
++        iflag.set(true);
++    }
 +    
 +    SortedKeyValueIterator<Key,Value> dc = ski1.deepCopy(null);
 +
-     if (interleaving == 2)
++    if (interleaving == 2) {
 +      imm.delete(0);
++      if (interrupt)
++        iflag.set(true);
++    }
 +
 +    dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ski1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +
-     if (interleaving == 3)
++    if (interleaving == 3) {
 +      imm.delete(0);
++      if (interrupt)
++        iflag.set(true);
++    }
 +
 +    ae(dc, "r1", "foo:cq1", 3, "bar1");
 +    ae(ski1, "r1", "foo:cq1", 3, "bar1");
 +    dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +
-     if (interleaving == 4)
++    if (interleaving == 4) {
 +      imm.delete(0);
++      if (interrupt)
++        iflag.set(true);
++    }
 +
 +    ae(ski1, "r1", "foo:cq2", 3, "bar2");
 +    ae(dc, "r1", "foo:cq1", 3, "bar1");
 +    ae(dc, "r1", "foo:cq2", 3, "bar2");
 +    assertFalse(dc.hasTop());
 +    assertFalse(ski1.hasTop());
++
++    if (interrupt)
++      dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +  }
 +
 +  @Test
 +  public void testDeepCopyAndDelete() throws Exception {
 +    for (int i = 0; i <= 4; i++)
-       deepCopyAndDelete(i);
++      deepCopyAndDelete(i, false);
++
++    for (int i = 1; i <= 4; i++)
++      try {
++        deepCopyAndDelete(i, true);
++        fail("i = " + i);
++      } catch (IterationInterruptedException iie) {}
 +  }
 +   
 +  @Test
 +  public void testBug1() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    for (int i = 0; i < 20; i++) {
 +      mutate(imm, "r1", "foo:cq" + i, 3, "bar" + i);
 +    }
 +
 +    for (int i = 0; i < 20; i++) {
 +      mutate(imm, "r2", "foo:cq" + i, 3, "bar" + i);
 +    }
 +
 +    MemoryIterator ski1 = imm.skvIterator();
 +    ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(ski1);
 +
 +    imm.delete(0);
 +
 +    ArrayList<ByteSequence> columns = new ArrayList<ByteSequence>();
 +    columns.add(new ArrayByteSequence("bar"));
 +
 +    // this seek resulted in an infinite loop before a bug was fixed
 +    cfsi.seek(new Range("r1"), columns, true);
 +
 +    assertFalse(cfsi.hasTop());
 +
 +    ski1.close();
 +  }
 +
 +  @Test
 +  public void testSeekBackWards() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    mutate(imm, "r1", "foo:cq1", 3, "bar1");
 +    mutate(imm, "r1", "foo:cq2", 3, "bar2");
 +    mutate(imm, "r1", "foo:cq3", 3, "bar3");
 +    mutate(imm, "r1", "foo:cq4", 3, "bar4");
 +
 +    MemoryIterator skvi1 = imm.skvIterator();
 +
 +    skvi1.seek(new Range(nk("r1", "foo:cq3", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(skvi1, "r1", "foo:cq3", 3, "bar3");
 +
 +    skvi1.seek(new Range(nk("r1", "foo:cq1", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(skvi1, "r1", "foo:cq1", 3, "bar1");
 +
 +  }
 +
 +  @Test
 +  public void testDuplicateKey() throws Exception {
 +    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +
 +    Mutation m = new Mutation(new Text("r1"));
 +    m.put(new Text("foo"), new Text("cq"), 3, new Value("v1".getBytes()));
 +    m.put(new Text("foo"), new Text("cq"), 3, new Value("v2".getBytes()));
 +    imm.mutate(Collections.singletonList(m));
 +
 +    MemoryIterator skvi1 = imm.skvIterator();
 +    skvi1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    ae(skvi1, "r1", "foo:cq", 3, "v2");
 +    ae(skvi1, "r1", "foo:cq", 3, "v1");
 +  }
 +
 +  private static final Logger log = Logger.getLogger(InMemoryMapTest.class);
 +
 +  static long sum(long[] counts) {
 +    long result = 0;
 +    for (int i = 0; i < counts.length; i++)
 +      result += counts[i];
 +    return result;
 +  }
 +
 +  // - hard to get this timing test to run well on apache build machines
 +  @Test
 +  @Ignore
 +  public void parallelWriteSpeed() throws InterruptedException, IOException {
 +    List<Double> timings = new ArrayList<Double>();
 +    for (int threads : new int[] {1, 2, 16, /* 64, 256 */}) {
 +      final long now = System.currentTimeMillis();
 +      final long counts[] = new long[threads];
 +      final InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 +      ExecutorService e = Executors.newFixedThreadPool(threads);
 +      for (int j = 0; j < threads; j++) {
 +        final int threadId = j;
 +        e.execute(new Runnable() {
 +          @Override
 +          public void run() {
 +            while (System.currentTimeMillis() - now < 1000) {
 +              for (int k = 0; k < 1000; k++) {
 +                Mutation m = new Mutation("row");
 +                m.put("cf", "cq", new Value("v".getBytes()));
 +                List<Mutation> mutations = Collections.singletonList(m);
 +                imm.mutate(mutations);
 +                counts[threadId]++;
 +              }
 +            }
 +          }
 +        });
 +      }
 +      e.shutdown();
 +      e.awaitTermination(10, TimeUnit.SECONDS);
 +      imm.delete(10000);
 +      double mutationsPerSecond = sum(counts) / ((System.currentTimeMillis() - now) / 1000.);
 +      timings.add(mutationsPerSecond);
 +      log.info(String.format("%.1f mutations per second with %d threads", mutationsPerSecond, threads));
 +    }
 +    // verify that more threads doesn't go a lot faster, or a lot slower than one thread
 +    for (int i = 0; i < timings.size(); i++) {
 +      double ratioFirst = timings.get(0) / timings.get(i);
 +      assertTrue(ratioFirst < 3);
 +      assertTrue(ratioFirst > 0.3);
 +    }
 +  }
 +
 +  @Test
 +  public void testLocalityGroups() throws Exception {
 +
 +    Map<String,Set<ByteSequence>> lggroups1 = new HashMap<String,Set<ByteSequence>>();
 +    lggroups1.put("lg1", newCFSet("cf1", "cf2"));
 +    lggroups1.put("lg2", newCFSet("cf3", "cf4"));
 +
 +    InMemoryMap imm = new InMemoryMap(lggroups1, false, tempFolder.newFolder().getAbsolutePath());
 +
 +    Mutation m1 = new Mutation("r1");
 +    m1.put("cf1", "x", 2, "1");
 +    m1.put("cf1", "y", 2, "2");
 +    m1.put("cf3", "z", 2, "3");
 +    m1.put("foo", "b", 2, "9");
 +
 +    Mutation m2 = new Mutation("r2");
 +    m2.put("cf2", "x", 3, "5");
 +
 +    Mutation m3 = new Mutation("r3");
 +    m3.put("foo", "b", 4, "6");
 +
 +    Mutation m4 = new Mutation("r4");
 +    m4.put("foo", "b", 5, "7");
 +    m4.put("cf4", "z", 5, "8");
 +
 +    Mutation m5 = new Mutation("r5");
 +    m5.put("cf3", "z", 6, "A");
 +    m5.put("cf4", "z", 6, "B");
 +
 +    imm.mutate(Arrays.asList(m1, m2, m3, m4, m5));
 +
 +    MemoryIterator iter1 = imm.skvIterator();
 +
 +    seekLocalityGroups(iter1);
 +    SortedKeyValueIterator<Key,Value> dc1 = iter1.deepCopy(null);
 +    seekLocalityGroups(dc1);
 +
 +    assertTrue(imm.getNumEntries() == 10);
 +    assertTrue(imm.estimatedSizeInBytes() > 0);
 +
 +    imm.delete(0);
 +
 +    seekLocalityGroups(iter1);
 +    seekLocalityGroups(dc1);
 +    // TODO uncomment following when ACCUMULO-1628 is fixed
 +    // seekLocalityGroups(iter1.deepCopy(null));
 +  }
 +
 +  private void seekLocalityGroups(SortedKeyValueIterator<Key,Value> iter1) throws IOException {
 +    iter1.seek(new Range(), newCFSet("cf1"), true);
 +    ae(iter1, "r1", "cf1:x", 2, "1");
 +    ae(iter1, "r1", "cf1:y", 2, "2");
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range("r2", "r4"), newCFSet("cf1"), true);
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range(), newCFSet("cf3"), true);
 +    ae(iter1, "r1", "cf3:z", 2, "3");
 +    ae(iter1, "r4", "cf4:z", 5, "8");
 +    ae(iter1, "r5", "cf3:z", 6, "A");
 +    ae(iter1, "r5", "cf4:z", 6, "B");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range(), newCFSet("foo"), true);
 +    ae(iter1, "r1", "foo:b", 2, "9");
 +    ae(iter1, "r3", "foo:b", 4, "6");
 +    ae(iter1, "r4", "foo:b", 5, "7");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range(), newCFSet("cf1", "cf3"), true);
 +    ae(iter1, "r1", "cf1:x", 2, "1");
 +    ae(iter1, "r1", "cf1:y", 2, "2");
 +    ae(iter1, "r1", "cf3:z", 2, "3");
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    ae(iter1, "r4", "cf4:z", 5, "8");
 +    ae(iter1, "r5", "cf3:z", 6, "A");
 +    ae(iter1, "r5", "cf4:z", 6, "B");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range("r2", "r4"), newCFSet("cf1", "cf3"), true);
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    ae(iter1, "r4", "cf4:z", 5, "8");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range(), newCFSet("cf1", "cf3", "foo"), true);
 +    assertAll(iter1);
 +
 +    iter1.seek(new Range("r1", "r2"), newCFSet("cf1", "cf3", "foo"), true);
 +    ae(iter1, "r1", "cf1:x", 2, "1");
 +    ae(iter1, "r1", "cf1:y", 2, "2");
 +    ae(iter1, "r1", "cf3:z", 2, "3");
 +    ae(iter1, "r1", "foo:b", 2, "9");
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 +    assertAll(iter1);
 +
 +    iter1.seek(new Range(), newCFSet("cf1"), false);
 +    assertAll(iter1);
 +
 +    iter1.seek(new Range(), newCFSet("cf1", "cf2"), false);
 +    ae(iter1, "r1", "cf3:z", 2, "3");
 +    ae(iter1, "r1", "foo:b", 2, "9");
 +    ae(iter1, "r3", "foo:b", 4, "6");
 +    ae(iter1, "r4", "cf4:z", 5, "8");
 +    ae(iter1, "r4", "foo:b", 5, "7");
 +    ae(iter1, "r5", "cf3:z", 6, "A");
 +    ae(iter1, "r5", "cf4:z", 6, "B");
 +    assertFalse(iter1.hasTop());
 +
 +    iter1.seek(new Range("r2"), newCFSet("cf1", "cf3", "foo"), true);
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    assertFalse(iter1.hasTop());
 +  }
 +
 +  private void assertAll(SortedKeyValueIterator<Key,Value> iter1) throws IOException {
 +    ae(iter1, "r1", "cf1:x", 2, "1");
 +    ae(iter1, "r1", "cf1:y", 2, "2");
 +    ae(iter1, "r1", "cf3:z", 2, "3");
 +    ae(iter1, "r1", "foo:b", 2, "9");
 +    ae(iter1, "r2", "cf2:x", 3, "5");
 +    ae(iter1, "r3", "foo:b", 4, "6");
 +    ae(iter1, "r4", "cf4:z", 5, "8");
 +    ae(iter1, "r4", "foo:b", 5, "7");
 +    ae(iter1, "r5", "cf3:z", 6, "A");
 +    ae(iter1, "r5", "cf4:z", 6, "B");
 +    assertFalse(iter1.hasTop());
 +  }
 +}


[10/10] git commit: Merge branch '1.6.1-SNAPSHOT'

Posted by el...@apache.org.
Merge branch '1.6.1-SNAPSHOT'

Conflicts:
	server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
	server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/6b5275e1
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/6b5275e1
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/6b5275e1

Branch: refs/heads/master
Commit: 6b5275e1b149597ff30cf2917fee4c240c9f1b04
Parents: abf966e d54e0fd
Author: Josh Elser <el...@apache.org>
Authored: Thu Sep 11 17:45:47 2014 -0700
Committer: Josh Elser <el...@apache.org>
Committed: Thu Sep 11 17:45:47 2014 -0700

----------------------------------------------------------------------
 .../system/SourceSwitchingIterator.java         | 20 ++++------
 .../system/SourceSwitchingIteratorTest.java     | 38 +++++++++++++++++-
 .../apache/accumulo/tserver/FileManager.java    | 13 +++++++
 .../apache/accumulo/tserver/InMemoryMap.java    | 21 +++++++---
 .../accumulo/tserver/tablet/ScanDataSource.java |  7 +++-
 .../accumulo/tserver/InMemoryMapTest.java       | 41 ++++++++++++++++----
 6 files changed, 113 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/6b5275e1/server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/accumulo/blob/6b5275e1/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
----------------------------------------------------------------------
diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
index b3d0b92,2e15767..9a1117d
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
@@@ -557,7 -557,9 +559,9 @@@ public class InMemoryMap 
          Configuration conf = CachedConfiguration.getInstance();
          FileSystem fs = TraceFileSystem.wrap(FileSystem.getLocal(conf));
          
 -        reader = new RFileOperations().openReader(memDumpFile, true, fs, conf, ServerConfiguration.getSiteConfiguration());
 +        reader = new RFileOperations().openReader(memDumpFile, true, fs, conf, SiteConfiguration.getInstance());
+         if (iflag != null)
+           reader.setInterruptFlag(iflag);
        }
  
        return reader;

http://git-wip-us.apache.org/repos/asf/accumulo/blob/6b5275e1/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java
----------------------------------------------------------------------
diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java
index 5464731,0000000..fe4b16b
mode 100644,000000..100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java
@@@ -1,222 -1,0 +1,227 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.accumulo.tserver.tablet;
 +
 +import java.io.IOException;
 +import java.util.ArrayList;
 +import java.util.Collection;
 +import java.util.HashSet;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.concurrent.atomic.AtomicBoolean;
 +
 +import org.apache.accumulo.core.data.Column;
 +import org.apache.accumulo.core.data.Key;
 +import org.apache.accumulo.core.data.Value;
 +import org.apache.accumulo.core.data.thrift.IterInfo;
 +import org.apache.accumulo.core.iterators.IterationInterruptedException;
 +import org.apache.accumulo.core.iterators.IteratorEnvironment;
 +import org.apache.accumulo.core.iterators.IteratorUtil;
 +import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
 +import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 +import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
 +import org.apache.accumulo.core.iterators.system.ColumnQualifierFilter;
 +import org.apache.accumulo.core.iterators.system.DeletingIterator;
 +import org.apache.accumulo.core.iterators.system.InterruptibleIterator;
 +import org.apache.accumulo.core.iterators.system.MultiIterator;
 +import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator.DataSource;
 +import org.apache.accumulo.core.iterators.system.StatsIterator;
 +import org.apache.accumulo.core.iterators.system.VisibilityFilter;
 +import org.apache.accumulo.core.metadata.schema.DataFileValue;
 +import org.apache.accumulo.core.security.Authorizations;
 +import org.apache.accumulo.core.util.Pair;
 +import org.apache.accumulo.server.fs.FileRef;
 +import org.apache.accumulo.tserver.FileManager.ScanFileManager;
 +import org.apache.accumulo.tserver.InMemoryMap.MemoryIterator;
 +import org.apache.accumulo.tserver.TabletIteratorEnvironment;
 +import org.apache.accumulo.tserver.TabletServer;
 +
 +class ScanDataSource implements DataSource {
 +
 +  // data source state
 +  private final Tablet tablet;
 +  private ScanFileManager fileManager;
 +  private SortedKeyValueIterator<Key,Value> iter;
 +  private long expectedDeletionCount;
 +  private List<MemoryIterator> memIters = null;
 +  private long fileReservationId;
 +  private AtomicBoolean interruptFlag;
 +  private StatsIterator statsIterator;
 +
 +  private final ScanOptions options;
 +
 +  ScanDataSource(Tablet tablet, Authorizations authorizations, byte[] defaultLabels, HashSet<Column> columnSet, List<IterInfo> ssiList, Map<String,Map<String,String>> ssio,
 +      AtomicBoolean interruptFlag) {
 +    this.tablet = tablet;
 +    expectedDeletionCount = tablet.getDataSourceDeletions();
 +    this.options = new ScanOptions(-1, authorizations, defaultLabels, columnSet, ssiList, ssio, interruptFlag, false);
 +    this.interruptFlag = interruptFlag;
 +  }
 +
 +  ScanDataSource(Tablet tablet, ScanOptions options) {
 +    this.tablet = tablet;
 +    expectedDeletionCount = tablet.getDataSourceDeletions();
 +    this.options = options;
 +    this.interruptFlag = options.getInterruptFlag();
 +  }
 +
 +  @Override
 +  public DataSource getNewDataSource() {
 +    if (!isCurrent()) {
 +      // log.debug("Switching data sources during a scan");
 +      if (memIters != null) {
 +        tablet.getTabletMemory().returnIterators(memIters);
 +        memIters = null;
 +        tablet.getDatafileManager().returnFilesForScan(fileReservationId);
 +        fileReservationId = -1;
 +      }
 +
 +      if (fileManager != null)
 +        fileManager.releaseOpenFiles(false);
 +
 +      expectedDeletionCount = tablet.getDataSourceDeletions();
 +      iter = null;
 +
 +      return this;
 +    } else
 +      return this;
 +  }
 +
 +  @Override
 +  public boolean isCurrent() {
 +    return expectedDeletionCount == tablet.getDataSourceDeletions();
 +  }
 +
 +  @Override
 +  public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
 +    if (iter == null)
 +      iter = createIterator();
 +    return iter;
 +  }
 +
 +  private SortedKeyValueIterator<Key,Value> createIterator() throws IOException {
 +
 +    Map<FileRef,DataFileValue> files;
 +
 +    synchronized (tablet) {
 +
 +      if (memIters != null)
 +        throw new IllegalStateException("Tried to create new scan iterator w/o releasing memory");
 +
 +      if (tablet.isClosed())
 +        throw new TabletClosedException();
 +
 +      if (interruptFlag.get())
 +        throw new IterationInterruptedException(tablet.getExtent().toString() + " " + interruptFlag.hashCode());
 +
 +      // only acquire the file manager when we know the tablet is open
 +      if (fileManager == null) {
 +        fileManager = tablet.getTabletResources().newScanFileManager();
 +        tablet.addActiveScans(this);
 +      }
 +
 +      if (fileManager.getNumOpenFiles() != 0)
 +        throw new IllegalStateException("Tried to create new scan iterator w/o releasing files");
 +
 +      // set this before trying to get iterators in case
 +      // getIterators() throws an exception
 +      expectedDeletionCount = tablet.getDataSourceDeletions();
 +
 +      memIters = tablet.getTabletMemory().getIterators();
 +      Pair<Long,Map<FileRef,DataFileValue>> reservation = tablet.getDatafileManager().reserveFilesForScan();
 +      fileReservationId = reservation.getFirst();
 +      files = reservation.getSecond();
 +    }
 +
 +    Collection<InterruptibleIterator> mapfiles = fileManager.openFiles(files, options.isIsolated());
 +
 +    List<SortedKeyValueIterator<Key,Value>> iters = new ArrayList<SortedKeyValueIterator<Key,Value>>(mapfiles.size() + memIters.size());
 +
 +    iters.addAll(mapfiles);
 +    iters.addAll(memIters);
 +
 +    for (SortedKeyValueIterator<Key,Value> skvi : iters)
 +      ((InterruptibleIterator) skvi).setInterruptFlag(interruptFlag);
 +
 +    MultiIterator multiIter = new MultiIterator(iters, tablet.getExtent());
 +
 +    TabletIteratorEnvironment iterEnv = new TabletIteratorEnvironment(IteratorScope.scan, tablet.getTableConfiguration(), fileManager, files);
 +
 +    statsIterator = new StatsIterator(multiIter, TabletServer.seekCount, tablet.getScannedCounter());
 +
 +    DeletingIterator delIter = new DeletingIterator(statsIterator, false);
 +
 +    ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);
 +
 +    ColumnQualifierFilter colFilter = new ColumnQualifierFilter(cfsi, options.getColumnSet());
 +
 +    VisibilityFilter visFilter = new VisibilityFilter(colFilter, options.getAuthorizations(), options.getDefaultLabels());
 +
 +    return iterEnv.getTopLevelIterator(IteratorUtil
 +        .loadIterators(IteratorScope.scan, visFilter, tablet.getExtent(), tablet.getTableConfiguration(), options.getSsiList(), options.getSsio(), iterEnv));
 +  }
 +
 +  void close(boolean sawErrors) {
 +
 +    if (memIters != null) {
 +      tablet.getTabletMemory().returnIterators(memIters);
 +      memIters = null;
 +      tablet.getDatafileManager().returnFilesForScan(fileReservationId);
 +      fileReservationId = -1;
 +    }
 +
 +    synchronized (tablet) {
 +      if (tablet.removeScan(this) == 0)
 +        tablet.notifyAll();
 +    }
 +
 +    if (fileManager != null) {
 +      fileManager.releaseOpenFiles(sawErrors);
 +      fileManager = null;
 +    }
 +
 +    if (statsIterator != null) {
 +      statsIterator.report();
 +    }
 +
 +  }
 +
 +  public void interrupt() {
 +    interruptFlag.set(true);
 +  }
 +
 +  @Override
 +  public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
 +    throw new UnsupportedOperationException();
 +  }
 +
 +  public void reattachFileManager() throws IOException {
 +    if (fileManager != null)
 +      fileManager.reattach();
 +  }
 +  
 +  public void detachFileManager() {
 +    if (fileManager != null)
 +      fileManager.detach();
 +  }
 +
- }
++  @Override
++  public void setInterruptFlag(AtomicBoolean flag) {
++    throw new UnsupportedOperationException();
++  }
++
++}


[03/10] git commit: ACCUMULO-1628 Fixes issue after previous changes which interrupted a deep-copy

Posted by el...@apache.org.
ACCUMULO-1628 Fixes issue after previous changes which interrupted a deep-copy

Pushes the interrupt flag from the SourceSwitchingIterator down to the
FileManager and InMemoryMap. This should avoid passing the interrupt
into a deep copy which isn't supported. Adds some more tests which
previously caused the edge case which is now fixed.

Signed-off-by: Josh Elser <el...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/7699e1f4
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/7699e1f4
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/7699e1f4

Branch: refs/heads/master
Commit: 7699e1f43c4ee51bfa4be1e9e73ea722f934a3d6
Parents: c335fca
Author: Keith Turner <kt...@apache.org>
Authored: Thu Sep 11 16:31:08 2014 -0700
Committer: Josh Elser <el...@apache.org>
Committed: Thu Sep 11 16:31:08 2014 -0700

----------------------------------------------------------------------
 .../system/SourceSwitchingIterator.java         | 20 ++++-------
 .../system/SourceSwitchingIteratorTest.java     | 38 +++++++++++++++++++-
 .../server/tabletserver/FileManager.java        | 13 +++++++
 .../server/tabletserver/InMemoryMap.java        | 21 ++++++++---
 .../accumulo/server/tabletserver/Tablet.java    |  5 +++
 .../server/tabletserver/InMemoryMapTest.java    | 38 ++++++++++++++++----
 6 files changed, 110 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java
index 33d0ebf..6c40176 100644
--- a/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIterator.java
@@ -47,6 +47,8 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
     DataSource getDeepCopyDataSource(IteratorEnvironment env);
 
     SortedKeyValueIterator<Key,Value> iterator() throws IOException;
+
+    void setInterruptFlag(AtomicBoolean flag);
   }
 
   private DataSource source;
@@ -60,20 +62,18 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
   private Collection<ByteSequence> columnFamilies;
 
   private boolean onlySwitchAfterRow;
-  private AtomicBoolean iflag;
 
   private final List<SourceSwitchingIterator> copies;
 
-  private SourceSwitchingIterator(DataSource source, boolean onlySwitchAfterRow, List<SourceSwitchingIterator> copies, AtomicBoolean iflag) {
+  private SourceSwitchingIterator(DataSource source, boolean onlySwitchAfterRow, List<SourceSwitchingIterator> copies) {
     this.source = source;
     this.onlySwitchAfterRow = onlySwitchAfterRow;
     this.copies = copies;
-    this.iflag = iflag;
     copies.add(this);
   }
 
   public SourceSwitchingIterator(DataSource source, boolean onlySwitchAfterRow) {
-    this(source, onlySwitchAfterRow, Collections.synchronizedList(new ArrayList<SourceSwitchingIterator>()), null);
+    this(source, onlySwitchAfterRow, Collections.synchronizedList(new ArrayList<SourceSwitchingIterator>()));
   }
 
   public SourceSwitchingIterator(DataSource source) {
@@ -82,7 +82,7 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
 
   @Override
   public synchronized SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
-    return new SourceSwitchingIterator(source.getDeepCopyDataSource(env), onlySwitchAfterRow, copies, iflag);
+    return new SourceSwitchingIterator(source.getDeepCopyDataSource(env), onlySwitchAfterRow, copies);
   }
 
   @Override
@@ -149,9 +149,6 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
     while (!source.isCurrent()) {
       source = source.getNewDataSource();
       iter = source.iterator();
-      if (iflag != null)
-        ((InterruptibleIterator) iter).setInterruptFlag(iflag);
-
       return true;
     }
 
@@ -164,11 +161,8 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
     this.inclusive = inclusive;
     this.columnFamilies = columnFamilies;
 
-    if (iter == null) {
+    if (iter == null)
       iter = source.iterator();
-      if (iflag != null)
-        ((InterruptibleIterator) iter).setInterruptFlag(iflag);
-    }
 
     readNext(true);
   }
@@ -196,10 +190,10 @@ public class SourceSwitchingIterator implements SortedKeyValueIterator<Key,Value
     if (copies.size() != 1)
       throw new IllegalStateException("setInterruptFlag() called after deep copies made " + copies.size());
 
-    this.iflag = flag;
     if (iter != null)
       ((InterruptibleIterator) iter).setInterruptFlag(flag);
 
+    source.setInterruptFlag(flag);
   }
 
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java b/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java
index a52b141..23f08a8 100644
--- a/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/iterators/system/SourceSwitchingIteratorTest.java
@@ -19,6 +19,7 @@ package org.apache.accumulo.core.iterators.system;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import junit.framework.TestCase;
 
@@ -26,6 +27,7 @@ import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IterationInterruptedException;
 import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.SortedMapIterator;
@@ -59,6 +61,7 @@ public class SourceSwitchingIteratorTest extends TestCase {
     DataSource next;
     SortedKeyValueIterator<Key,Value> iter;
     List<TestDataSource> copies = new ArrayList<TestDataSource>();
+    AtomicBoolean iflag;
     
     TestDataSource(SortedKeyValueIterator<Key,Value> iter) {
       this(iter, new ArrayList<TestDataSource>());
@@ -82,6 +85,8 @@ public class SourceSwitchingIteratorTest extends TestCase {
     
     @Override
     public SortedKeyValueIterator<Key,Value> iterator() {
+      if (iflag != null)
+        ((InterruptibleIterator) iter).setInterruptFlag(iflag);
       return iter;
     }
     
@@ -98,7 +103,11 @@ public class SourceSwitchingIteratorTest extends TestCase {
           tds.next = new TestDataSource(next.iter.deepCopy(null), next.copies);
       }
     }
-    
+
+    @Override
+    public void setInterruptFlag(AtomicBoolean flag) {
+      this.iflag = flag;
+    }
   }
   
   public void test1() throws Exception {
@@ -236,4 +245,31 @@ public class SourceSwitchingIteratorTest extends TestCase {
     ane(dc1, "r2", "cf1", "cq2", 6, "v4", true);
     assertFalse(dc1.hasTop());
   }
+
+  public void testSetInterrupt() throws Exception {
+
+    TreeMap<Key,Value> tm1 = new TreeMap<Key,Value>();
+    put(tm1, "r1", "cf1", "cq1", 5, "v1");
+
+    SortedMapIterator smi = new SortedMapIterator(tm1);
+    TestDataSource tds = new TestDataSource(smi);
+    SourceSwitchingIterator ssi = new SourceSwitchingIterator(tds, false);
+
+    AtomicBoolean flag = new AtomicBoolean();
+    ssi.setInterruptFlag(flag);
+
+    assertSame(flag, tds.iflag);
+
+    ssi.seek(new Range("r1"), new ArrayList<ByteSequence>(), false);
+    ane(ssi, "r1", "cf1", "cq1", 5, "v1", true);
+    assertFalse(ssi.hasTop());
+
+    flag.set(true);
+
+    try {
+      ssi.seek(new Range("r1"), new ArrayList<ByteSequence>(), false);
+      fail("expected to see IterationInterruptedException");
+    } catch (IterationInterruptedException iie) {}
+
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java b/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java
index 9613cca..cd5ca9c 100644
--- a/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java
+++ b/server/src/main/java/org/apache/accumulo/server/tabletserver/FileManager.java
@@ -27,6 +27,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.concurrent.Semaphore;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.data.Key;
@@ -379,6 +380,7 @@ public class FileManager {
     private boolean current = true;
     private IteratorEnvironment env;
     private String file;
+    private AtomicBoolean iflag;
     
     FileDataSource(String file, SortedKeyValueIterator<Key,Value> iter) {
       this.file = file;
@@ -411,6 +413,8 @@ public class FileManager {
     
     @Override
     public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
+      if (iflag != null)
+        ((InterruptibleIterator) this.iter).setInterruptFlag(iflag);
       return iter;
     }
     
@@ -426,11 +430,20 @@ public class FileManager {
     void setIterator(SortedKeyValueIterator<Key,Value> iter) {
       current = false;
       this.iter = iter;
+
+      if (iflag != null)
+        ((InterruptibleIterator) this.iter).setInterruptFlag(iflag);
+
       for (FileDataSource fds : deepCopies) {
         fds.current = false;
         fds.iter = iter.deepCopy(fds.env);
       }
     }
+
+    @Override
+    public void setInterruptFlag(AtomicBoolean flag) {
+      this.iflag = flag;
+    }
     
   }
   

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java b/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java
index 43cf3c1..b696ff4 100644
--- a/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java
+++ b/server/src/main/java/org/apache/accumulo/server/tabletserver/InMemoryMap.java
@@ -384,15 +384,17 @@ public class InMemoryMap {
     private FileSKVIterator reader;
     private MemoryDataSource parent;
     private IteratorEnvironment env;
+    private AtomicBoolean iflag;
     
     MemoryDataSource() {
-      this(null, false, null);
+      this(null, false, null, null);
     }
     
-    public MemoryDataSource(MemoryDataSource parent, boolean switched, IteratorEnvironment env) {
+    public MemoryDataSource(MemoryDataSource parent, boolean switched, IteratorEnvironment env, AtomicBoolean iflag) {
       this.parent = parent;
       this.switched = switched;
       this.env = env;
+      this.iflag = iflag;
     }
     
     @Override
@@ -428,6 +430,8 @@ public class InMemoryMap {
         FileSystem fs = TraceFileSystem.wrap(FileSystem.getLocal(conf));
         
         reader = new RFileOperations().openReader(memDumpFile, true, fs, conf, ServerConfiguration.getSiteConfiguration());
+        if (iflag != null)
+          reader.setInterruptFlag(iflag);
       }
 
       return reader;
@@ -436,9 +440,11 @@ public class InMemoryMap {
     @Override
     public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
       if (iter == null)
-        if (!switched)
+        if (!switched) {
           iter = map.skvIterator();
-        else {
+          if (iflag != null)
+            iter.setInterruptFlag(iflag);
+        } else {
           if (parent == null)
             iter = new MemKeyConversionIterator(getReader());
           else
@@ -454,7 +460,12 @@ public class InMemoryMap {
     
     @Override
     public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
-      return new MemoryDataSource(parent == null ? this : parent, switched, env);
+      return new MemoryDataSource(parent == null ? this : parent, switched, env, iflag);
+    }
+
+    @Override
+    public void setInterruptFlag(AtomicBoolean flag) {
+      this.iflag = flag;
     }
     
   }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java b/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java
index a1fc707..bb13ff8 100644
--- a/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java
+++ b/server/src/main/java/org/apache/accumulo/server/tabletserver/Tablet.java
@@ -2139,6 +2139,11 @@ public class Tablet {
     public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
       throw new UnsupportedOperationException();
     }
+
+    @Override
+    public void setInterruptFlag(AtomicBoolean flag) {
+      throw new UnsupportedOperationException();
+    }
     
   }
   

http://git-wip-us.apache.org/repos/asf/accumulo/blob/7699e1f4/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java
----------------------------------------------------------------------
diff --git a/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java b/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java
index c905bb8..683adf4 100644
--- a/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java
+++ b/server/src/test/java/org/apache/accumulo/server/tabletserver/InMemoryMapTest.java
@@ -23,6 +23,7 @@ import java.util.List;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import junit.framework.TestCase;
 
@@ -32,6 +33,7 @@ import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IterationInterruptedException;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
 import org.apache.accumulo.core.util.LocalityGroupUtil;
@@ -237,7 +239,7 @@ public class InMemoryMapTest extends TestCase {
     ski1.close();
   }
   
-  private void deepCopyAndDelete(int interleaving) throws Exception {
+  private void deepCopyAndDelete(int interleaving, boolean interrupt) throws Exception {
     // interleaving == 0 intentionally omitted, this runs the test w/o deleting in mem map
 
     InMemoryMap imm = new InMemoryMap(false, System.getProperty("user.dir") + "/target");
@@ -247,37 +249,61 @@ public class InMemoryMapTest extends TestCase {
     
     MemoryIterator ski1 = imm.skvIterator();
     
-    if (interleaving == 1)
+    AtomicBoolean iflag = new AtomicBoolean(false);
+    ski1.setInterruptFlag(iflag);
+
+    if (interleaving == 1) {
       imm.delete(0);
+      if (interrupt)
+        iflag.set(true);
+    }
     
     SortedKeyValueIterator<Key,Value> dc = ski1.deepCopy(null);
 
-    if (interleaving == 2)
+    if (interleaving == 2) {
       imm.delete(0);
+      if (interrupt)
+        iflag.set(true);
+    }
 
     dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
     ski1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 
-    if (interleaving == 3)
+    if (interleaving == 3) {
       imm.delete(0);
+      if (interrupt)
+        iflag.set(true);
+    }
 
     ae(dc, "r1", "foo:cq1", 3, "bar1");
     ae(ski1, "r1", "foo:cq1", 3, "bar1");
     dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
 
-    if (interleaving == 4)
+    if (interleaving == 4) {
       imm.delete(0);
+      if (interrupt)
+        iflag.set(true);
+    }
 
     ae(ski1, "r1", "foo:cq2", 3, "bar2");
     ae(dc, "r1", "foo:cq1", 3, "bar1");
     ae(dc, "r1", "foo:cq2", 3, "bar2");
     assertFalse(dc.hasTop());
     assertFalse(ski1.hasTop());
+
+    if (interrupt)
+      dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
   }
 
   public void testDeepCopyAndDelete() throws Exception {
     for (int i = 0; i <= 4; i++)
-      deepCopyAndDelete(i);
+      deepCopyAndDelete(i, false);
+
+    for (int i = 1; i <= 4; i++)
+      try {
+        deepCopyAndDelete(i, true);
+        fail("i = " + i);
+      } catch (IterationInterruptedException iie) {}
   }
 
   public void testBug1() throws Exception {