You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ec...@apache.org on 2011/12/05 17:27:56 UTC

svn commit: r1210518 - /incubator/accumulo/branches/1.3/src/core/src/main/java/org/apache/accumulo/core/file/rfile/SplitLarge.java

Author: ecn
Date: Mon Dec  5 16:27:55 2011
New Revision: 1210518

URL: http://svn.apache.org/viewvc?rev=1210518&view=rev
Log:
ACCUMULO-201: adding tools used to recover from OOM condition during a major compaction

Added:
    incubator/accumulo/branches/1.3/src/core/src/main/java/org/apache/accumulo/core/file/rfile/SplitLarge.java   (with props)

Added: incubator/accumulo/branches/1.3/src/core/src/main/java/org/apache/accumulo/core/file/rfile/SplitLarge.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.3/src/core/src/main/java/org/apache/accumulo/core/file/rfile/SplitLarge.java?rev=1210518&view=auto
==============================================================================
--- incubator/accumulo/branches/1.3/src/core/src/main/java/org/apache/accumulo/core/file/rfile/SplitLarge.java (added)
+++ incubator/accumulo/branches/1.3/src/core/src/main/java/org/apache/accumulo/core/file/rfile/SplitLarge.java Mon Dec  5 16:27:55 2011
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.file.rfile;
+
+import java.util.ArrayList;
+
+import org.apache.accumulo.core.conf.DefaultConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile;
+import org.apache.accumulo.core.file.rfile.RFile.Reader;
+import org.apache.accumulo.core.file.rfile.RFile.Writer;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.commons.cli.BasicParser;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Split an RFile into large and small key/value files.
+ * 
+ */
+public class SplitLarge {
+  
+  public static void main(String[] args) throws Exception {
+    Configuration conf = CachedConfiguration.getInstance();
+    FileSystem fs = FileSystem.get(conf);
+    long maxSize = 10 * 1024 * 1024;
+    
+    Options opts = new Options();
+    Option maxSizeOption = new Option("m", "", true, "the maximum size of the key/value pair to shunt to the small file");
+    opts.addOption(maxSizeOption);
+    
+    CommandLine commandLine = new BasicParser().parse(opts, args);
+    if (commandLine.hasOption(maxSizeOption.getOpt())) {
+      maxSize = Long.parseLong(commandLine.getOptionValue(maxSizeOption.getOpt()));
+    }
+    
+    for (String arg : commandLine.getArgs()) {
+      Path path = new Path(arg);
+      CachableBlockFile.Reader rdr = new CachableBlockFile.Reader(fs, path, conf, null, null);
+      Reader iter = new RFile.Reader(rdr);
+      
+      if (!arg.endsWith(".rf")) {
+        throw new IllegalArgumentException("File must end with .rf");
+      }
+      String smallName = arg.substring(0, arg.length() - 3) + "_small.rf";
+      String largeName = arg.substring(0, arg.length() - 3) + "_large.rf";
+      
+      int blockSize = (int) DefaultConfiguration.getDefaultConfiguration().getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE);
+      Writer small = new RFile.Writer(new CachableBlockFile.Writer(fs, new Path(smallName), "gz", conf), blockSize);
+      small.startDefaultLocalityGroup();
+      Writer large = new RFile.Writer(new CachableBlockFile.Writer(fs, new Path(largeName), "gz", conf), blockSize);
+      large.startDefaultLocalityGroup();
+
+      iter.seek(new Range(), new ArrayList<ByteSequence>(), false);
+      while (iter.hasTop()) {
+        Key key = iter.getTopKey();
+        Value value = iter.getTopValue();
+        if (key.getSize() + value.getSize() < maxSize) {
+          small.append(key, value);
+        } else {
+          large.append(key, value);
+        }
+        iter.next();
+      }
+
+      iter.close();
+      large.close();
+      small.close();
+    }
+  }
+  
+}

Propchange: incubator/accumulo/branches/1.3/src/core/src/main/java/org/apache/accumulo/core/file/rfile/SplitLarge.java
------------------------------------------------------------------------------
    svn:eol-style = native