You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by el...@apache.org on 2013/10/29 06:06:30 UTC

git commit: ACCUMULO-1773 Review and minor changes to GarbageCollection rework.

Updated Branches:
  refs/heads/master 26a8f5f52 -> 72fdef3d5


ACCUMULO-1773 Review and minor changes to GarbageCollection rework.

Add shortcircuit when scanning candidates for any files which might be in a
bulk load. Add javadoc to the GarbageCollectionEnvironment to actually describe
what the interface should be doing. Remove old 'offline' warning message
as the option itself is gone. Added TODO about caught exception when
trying to write deletes. Removed unnecessary string concat in
ChooseDir.


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/72fdef3d
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/72fdef3d
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/72fdef3d

Branch: refs/heads/master
Commit: 72fdef3d589b9122b562bf8c89a4f22535f460d0
Parents: 26a8f5f
Author: Josh Elser <el...@apache.org>
Authored: Tue Oct 29 00:59:43 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Tue Oct 29 00:59:43 2013 -0400

----------------------------------------------------------------------
 .../server/gc/GarbageCollectionAlgorithm.java   |  5 ++-
 .../server/gc/GarbageCollectionEnvironment.java | 47 ++++++++++++++++++++
 .../server/gc/SimpleGarbageCollector.java       |  8 ++--
 .../server/master/tableOps/CreateTable.java     |  3 +-
 .../server/gc/GarbageCollectionTest.java        | 11 +++++
 5 files changed, 67 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/72fdef3d/server/src/main/java/org/apache/accumulo/server/gc/GarbageCollectionAlgorithm.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/gc/GarbageCollectionAlgorithm.java b/server/src/main/java/org/apache/accumulo/server/gc/GarbageCollectionAlgorithm.java
index 04d4c5d..02018bc 100644
--- a/server/src/main/java/org/apache/accumulo/server/gc/GarbageCollectionAlgorithm.java
+++ b/server/src/main/java/org/apache/accumulo/server/gc/GarbageCollectionAlgorithm.java
@@ -122,8 +122,9 @@ public class GarbageCollectionAlgorithm {
   protected void confirmDeletes(GarbageCollectionEnvironment gce, SortedMap<String,String> candidateMap) throws TableNotFoundException, AccumuloException,
       AccumuloSecurityException {
     boolean checkForBulkProcessingFiles = false;
-    for (String candidate : candidateMap.keySet())
-      checkForBulkProcessingFiles |= candidate.toLowerCase(Locale.ENGLISH).contains(Constants.BULK_PREFIX);
+    Iterator<String> relativePaths = candidateMap.keySet().iterator();
+    while (!checkForBulkProcessingFiles && relativePaths.hasNext())
+      checkForBulkProcessingFiles |= relativePaths.next().toLowerCase(Locale.ENGLISH).contains(Constants.BULK_PREFIX);
 
     if (checkForBulkProcessingFiles) {
       Iterator<String> blipiter = gce.getBlipIterator();

http://git-wip-us.apache.org/repos/asf/accumulo/blob/72fdef3d/server/src/main/java/org/apache/accumulo/server/gc/GarbageCollectionEnvironment.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/gc/GarbageCollectionEnvironment.java b/server/src/main/java/org/apache/accumulo/server/gc/GarbageCollectionEnvironment.java
index 2b939dd..fa35ed3 100644
--- a/server/src/main/java/org/apache/accumulo/server/gc/GarbageCollectionEnvironment.java
+++ b/server/src/main/java/org/apache/accumulo/server/gc/GarbageCollectionEnvironment.java
@@ -28,25 +28,72 @@ import org.apache.accumulo.core.client.AccumuloSecurityException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
+import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ScanFileColumnFamily;
 
 /**
  * 
  */
 public interface GarbageCollectionEnvironment {
 
+  /**
+   * Return a list of paths to files which are candidates for deletion from a given table, {@link RootTable.NAME} or {@link MetadataTable.NAME}
+   * @param continuePoint A row to resume from if a previous invocation was stopped due to finding an extremely large number
+   *            of candidates to remove which would have exceeded memory limitations
+   * @return A collection of candidates files for deletion, may not be the complete collection of files for deletion at this point in time
+   * @throws TableNotFoundException
+   * @throws AccumuloException
+   * @throws AccumuloSecurityException
+   */
   List<String> getCandidates(String continuePoint) throws TableNotFoundException, AccumuloException, AccumuloSecurityException;
 
+  /**
+   * Fetch a list of paths for all bulk loads in progress (blip) from a given table, {@link RootTable.NAME} or {@link MetadataTable.NAME}
+   * @return The list of files for each bulk load currently in progress.
+   * @throws TableNotFoundException
+   * @throws AccumuloException
+   * @throws AccumuloSecurityException
+   */
   Iterator<String> getBlipIterator() throws TableNotFoundException, AccumuloException, AccumuloSecurityException;
 
+  /**
+   * Fetches the references to files, {@link DataFileColumnFamily.NAME} or {@link ScanFileColumnFamily.NAME}, from tablets
+   * @return An Iterator to the @{link Entry<Key,Value>}s which constitute a reference to a file.
+   * @throws TableNotFoundException
+   * @throws AccumuloException
+   * @throws AccumuloSecurityException
+   */
   Iterator<Entry<Key,Value>> getReferenceIterator() throws TableNotFoundException, AccumuloException, AccumuloSecurityException;
 
+  /**
+   * Return the set of tableIDs for the given instance this GarbageCollector is running over
+   * @return The valueSet for the table name to table id map.
+   */
   Set<String> getTableIDs();
 
+  /**
+   * Delete the given files from the provided {@link Map} of relative path to absolute path for each file that should be deleted
+   * @param candidateMap A Map from relative path to absolute path for files to be deleted.
+   * @throws IOException
+   */
   void delete(SortedMap<String,String> candidateMap) throws IOException;
 
+  /**
+   * Delete a table's directory if it is empty.
+   * @param tableID The id of the table whose directory we are to operate on
+   * @throws IOException
+   */
   void deleteTableDirIfEmpty(String tableID) throws IOException;
 
+  /**
+   * Increment the number of candidates for deletion for the current garbage collection run
+   * @param i Value to increment the deletion candidates by
+   */
   void incrementCandidatesStat(long i);
 
+  /**
+   * Increment the number of files still in use for the current garbage collection run
+   * @param i Value to increment the still-in-use count by.
+   */
   void incrementInUseStat(long i);
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/72fdef3d/server/src/main/java/org/apache/accumulo/server/gc/SimpleGarbageCollector.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/gc/SimpleGarbageCollector.java b/server/src/main/java/org/apache/accumulo/server/gc/SimpleGarbageCollector.java
index b598ec1..2619a1c 100644
--- a/server/src/main/java/org/apache/accumulo/server/gc/SimpleGarbageCollector.java
+++ b/server/src/main/java/org/apache/accumulo/server/gc/SimpleGarbageCollector.java
@@ -243,10 +243,7 @@ public class SimpleGarbageCollector implements Iface {
       if (opts.safeMode) {
         if (opts.verbose)
           System.out.println("SAFEMODE: There are " + confirmedDeletes.size() + " data file candidates marked for deletion.%n"
-              + "          Examine the log files to identify them.%n" + "          They can be removed by executing: bin/accumulo gc --offline%n"
-              + "WARNING:  Do not run the garbage collector in offline mode unless you are positive%n"
-              + "          that the accumulo METADATA table is in a clean state, or that accumulo%n"
-              + "          has not yet been run, in the case of an upgrade.");
+              + "          Examine the log files to identify them.%n");
         log.info("SAFEMODE: Listing all data file candidates for deletion");
         for (String s : confirmedDeletes.values())
           log.info("SAFEMODE: " + s);
@@ -263,10 +260,13 @@ public class SimpleGarbageCollector implements Iface {
         writer = c.createBatchWriter(tableName, new BatchWriterConfig());
       } catch (AccumuloException e) {
         log.error("Unable to connect to Accumulo to write deletes", e);
+        // TODO Throw exception or return?
       } catch (AccumuloSecurityException e) {
         log.error("Unable to connect to Accumulo to write deletes", e);
+        // TODO Throw exception or return?
       } catch (TableNotFoundException e) {
         log.error("Unable to create writer to remove file from the " + e.getTableName() + " table", e);
+        // TODO Throw exception or return?
       }
 
       // when deleting a dir and all files in that dir, only need to delete the dir

http://git-wip-us.apache.org/repos/asf/accumulo/blob/72fdef3d/server/src/main/java/org/apache/accumulo/server/master/tableOps/CreateTable.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/master/tableOps/CreateTable.java b/server/src/main/java/org/apache/accumulo/server/master/tableOps/CreateTable.java
index 23b3760..8bd2c3d 100644
--- a/server/src/main/java/org/apache/accumulo/server/master/tableOps/CreateTable.java
+++ b/server/src/main/java/org/apache/accumulo/server/master/tableOps/CreateTable.java
@@ -173,7 +173,8 @@ class ChooseDir extends MasterRepo {
 
   @Override
   public Repo<Master> call(long tid, Master master) throws Exception {
-    tableInfo.dir = master.getFileSystem().choose(ServerConstants.getTablesDirs()) + "/" + tableInfo.tableId + "" + Constants.DEFAULT_TABLET_LOCATION;
+    // Constants.DEFAULT_TABLET_LOCATION has a leading slash prepended to it so we don't need to add one here
+    tableInfo.dir = master.getFileSystem().choose(ServerConstants.getTablesDirs()) + "/" + tableInfo.tableId + Constants.DEFAULT_TABLET_LOCATION;
     return new CreateDir(tableInfo);
   }
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/72fdef3d/server/src/test/java/org/apache/accumulo/server/gc/GarbageCollectionTest.java
----------------------------------------------------------------------
diff --git a/server/src/test/java/org/apache/accumulo/server/gc/GarbageCollectionTest.java b/server/src/test/java/org/apache/accumulo/server/gc/GarbageCollectionTest.java
index 2f3f16e..6dad4de 100644
--- a/server/src/test/java/org/apache/accumulo/server/gc/GarbageCollectionTest.java
+++ b/server/src/test/java/org/apache/accumulo/server/gc/GarbageCollectionTest.java
@@ -160,18 +160,22 @@ public class GarbageCollectionTest {
     gca.collect(gce);
     assertRemoved(gce);
 
+    // Remove the reference to this flush file, run the GC which should not trim it from the candidates, and assert that it's gone
     gce.removeFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf");
     gca.collect(gce);
     assertRemoved(gce, "hdfs://foo:6000/accumulo/tables/4/t0/F000.rf");
 
+    // Removing a reference to a file that wasn't in the candidates should do nothing
     gce.removeFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F002.rf");
     gca.collect(gce);
     assertRemoved(gce);
 
+    // Remove the reference to a file in the candidates should cause it to be removed
     gce.removeFileReference("4", null, "hdfs://foo:6000/accumulo/tables/4/t0/F001.rf");
     gca.collect(gce);
     assertRemoved(gce, "hdfs://foo.com:6000/accumulo/tables/4/t0/F001.rf");
 
+    // Adding more candidates which do no have references should be removed
     gce.candidates.add("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf");
     gce.candidates.add("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf");
     gca.collect(gce);
@@ -195,6 +199,7 @@ public class GarbageCollectionTest {
 
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
 
+    // All candidates currently have references
     gca.collect(gce);
     assertRemoved(gce);
 
@@ -249,16 +254,20 @@ public class GarbageCollectionTest {
 
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
 
+    // Nothing should be removed because all candidates exist within a blip
     gca.collect(gce);
     assertRemoved(gce);
 
+    // Remove the first blip
     gce.blips.remove("/4/b-0");
 
+    // And we should lose all files in that blip and the blip directory itself -- relative and absolute 
     gca.collect(gce);
     assertRemoved(gce, "/4/b-0", "/4/b-0/F002.rf", "hdfs://foo.com:6000/accumulo/tables/4/b-0/F001.rf");
 
     gce.blips.remove("hdfs://foo.com:6000/accumulo/tables/5/b-0");
 
+    // Same as above, we should lose relative and absolute for a relative or absolute blip
     gca.collect(gce);
     assertRemoved(gce, "/5/b-0", "/5/b-0/F002.rf", "hdfs://foo.com:6000/accumulo/tables/5/b-0/F001.rf");
 
@@ -298,9 +307,11 @@ public class GarbageCollectionTest {
 
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
 
+    // A directory reference does not preclude a candidate file beneath that directory from deletion
     gca.collect(gce);
     assertRemoved(gce, "/4/t-0/F002.rf");
 
+    // Removing the dir reference for a table will delete all tablet directories
     gce.removeDirReference("5", null);
     gca.collect(gce);
     assertRemoved(gce, "hdfs://foo.com:6000/accumulo/tables/5/t-0");