You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2007/12/21 22:58:27 UTC
svn commit: r606332 - in /lucene/hadoop/trunk/src/contrib/hbase: ./
src/java/org/apache/hadoop/hbase/ src/test/org/apache/hadoop/hbase/
Author: stack
Date: Fri Dec 21 13:58:25 2007
New Revision: 606332
URL: http://svn.apache.org/viewvc?rev=606332&view=rev
Log:
HADOOP-2479 Save on number of Text object creations
Modified:
lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java
lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java
Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=606332&r1=606331&r2=606332&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Fri Dec 21 13:58:25 2007
@@ -19,6 +19,7 @@
(Bryan Duxbury via Stack)
OPTIMIZATIONS
+ HADOOP-2479 Save on number of Text object creations
BUG FIXES
HADOOP-2059 In tests, exceptions in min dfs shutdown should not fail test
Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java?rev=606332&r1=606331&r2=606332&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java Fri Dec 21 13:58:25 2007
@@ -64,7 +64,7 @@
private static class ColumnMatcher {
private boolean wildCardmatch;
private MATCH_TYPE matchType;
- private String family;
+ private Text family;
private Pattern columnMatcher;
private Text col;
@@ -73,7 +73,7 @@
try {
if(qualifier == null || qualifier.getLength() == 0) {
this.matchType = MATCH_TYPE.FAMILY_ONLY;
- this.family = HStoreKey.extractFamily(col).toString();
+ this.family = HStoreKey.extractFamily(col).toText();
this.wildCardmatch = true;
} else if(isRegexPattern.matcher(qualifier.toString()).matches()) {
this.matchType = MATCH_TYPE.REGEX;
@@ -93,13 +93,10 @@
boolean matches(Text c) throws IOException {
if(this.matchType == MATCH_TYPE.SIMPLE) {
return c.equals(this.col);
-
} else if(this.matchType == MATCH_TYPE.FAMILY_ONLY) {
- return HStoreKey.extractFamily(c).toString().equals(this.family);
-
+ return HStoreKey.extractFamily(c).equals(this.family);
} else if(this.matchType == MATCH_TYPE.REGEX) {
return this.columnMatcher.matcher(c.toString()).matches();
-
} else {
throw new IOException("Invalid match type: " + this.matchType);
}
@@ -130,7 +127,7 @@
this.multipleMatchers = false;
this.okCols = new TreeMap<Text, Vector<ColumnMatcher>>();
for(int i = 0; i < targetCols.length; i++) {
- Text family = HStoreKey.extractFamily(targetCols[i]);
+ Text family = HStoreKey.extractFamily(targetCols[i]).toText();
Vector<ColumnMatcher> matchers = okCols.get(family);
if(matchers == null) {
matchers = new Vector<ColumnMatcher>();
@@ -160,8 +157,8 @@
*/
boolean columnMatch(int i) throws IOException {
Text column = keys[i].getColumn();
- Text family = HStoreKey.extractFamily(column);
- Vector<ColumnMatcher> matchers = okCols.get(family);
+ Vector<ColumnMatcher> matchers =
+ okCols.get(HStoreKey.extractFamily(column));
if(matchers == null) {
return false;
}
Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java?rev=606332&r1=606331&r2=606332&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java Fri Dec 21 13:58:25 2007
@@ -283,7 +283,7 @@
long maxSeqId = -1;
for(Map.Entry<Text, HColumnDescriptor> e :
this.regionInfo.getTableDesc().families().entrySet()) {
- Text colFamily = HStoreKey.extractFamily(e.getKey());
+ Text colFamily = HStoreKey.extractFamily(e.getKey()).toText();
HStore store = new HStore(rootDir, this.regionInfo.getRegionName(),
this.encodedRegionName, e.getValue(), fs, oldLogFile, conf);
Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java?rev=606332&r1=606331&r2=606332&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java Fri Dec 21 13:58:25 2007
@@ -44,6 +44,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.filter.RowFilterInterface;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.io.TextSequence;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
@@ -565,7 +566,7 @@
this.regionName = regionName;
this.encodedRegionName = encodedName;
this.family = family;
- this.familyName = HStoreKey.extractFamily(this.family.getName());
+ this.familyName = HStoreKey.extractFamily(this.family.getName()).toText();
this.compression = SequenceFile.CompressionType.NONE;
this.storeName = this.encodedRegionName + "/" + this.familyName.toString();
@@ -939,8 +940,8 @@
try {
for (Map.Entry<HStoreKey, byte []> es: cache.entrySet()) {
HStoreKey curkey = es.getKey();
- if (this.familyName.equals(HStoreKey.extractFamily(
- curkey.getColumn()))) {
+ TextSequence f = HStoreKey.extractFamily(curkey.getColumn());
+ if (f.equals(this.familyName)) {
out.append(curkey, new ImmutableBytesWritable(es.getValue()));
}
}
Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java?rev=606332&r1=606331&r2=606332&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java Fri Dec 21 13:58:25 2007
@@ -19,6 +19,7 @@
*/
package org.apache.hadoop.hbase;
+import org.apache.hadoop.hbase.io.TextSequence;
import org.apache.hadoop.io.*;
import java.io.*;
@@ -27,94 +28,14 @@
* A Key for a stored row
*/
public class HStoreKey implements WritableComparable {
- public static final char COLUMN_FAMILY_DELIMITER = ':';
-
- // TODO: Move these utility methods elsewhere (To a Column class?).
- /**
- * Extracts the column family name from a column
- * For example, returns 'info' if the specified column was 'info:server'
- * @param col name of column
- * @return column family name
- * @throws InvalidColumnNameException
- */
- public static Text extractFamily(final Text col)
- throws InvalidColumnNameException {
- return extractFamily(col, false);
- }
-
/**
- * Extracts the column family name from a column
- * For example, returns 'info' if the specified column was 'info:server'
- * @param col name of column
- * @param withColon if returned family name should include the ':' suffix.
- * @return column family name
- * @throws InvalidColumnNameException
+ * Colon character in UTF-8
*/
- public static Text extractFamily(final Text col, final boolean withColon)
- throws InvalidColumnNameException {
- int offset = getColonOffset(col);
- // Include ':' in copy?
- offset += (withColon)? 1: 0;
- if (offset == col.getLength()) {
- return col;
- }
- byte [] buffer = new byte[offset];
- System.arraycopy(col.getBytes(), 0, buffer, 0, offset);
- return new Text(buffer);
- }
-
- /**
- * Extracts the column qualifier, the portion that follows the colon (':')
- * family/qualifier separator.
- * For example, returns 'server' if the specified column was 'info:server'
- * @param col name of column
- * @return column qualifier or null if there is no qualifier.
- * @throws InvalidColumnNameException
- */
- public static Text extractQualifier(final Text col)
- throws InvalidColumnNameException {
- int offset = getColonOffset(col);
- if (offset + 1 == col.getLength()) {
- return null;
- }
- int bufferLength = col.getLength() - (offset + 1);
- byte [] buffer = new byte[bufferLength];
- System.arraycopy(col.getBytes(), offset + 1, buffer, 0, bufferLength);
- return new Text(buffer);
- }
+ public static final char COLUMN_FAMILY_DELIMITER = ':';
- private static int getColonOffset(final Text col)
- throws InvalidColumnNameException {
- int offset = -1;
- for (int i = 0; i < col.getLength(); i++) {
- if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) {
- offset = i;
- break;
- }
- }
- if(offset < 0) {
- throw new InvalidColumnNameException(col + " is missing the colon " +
- "family/qualifier separator");
- }
- return offset;
- }
-
- /**
- * Returns row and column bytes out of an HStoreKey.
- * @param hsk Store key.
- * @return byte array encoding of HStoreKey
- * @throws UnsupportedEncodingException
- */
- public static byte[] getBytes(final HStoreKey hsk)
- throws UnsupportedEncodingException {
- StringBuilder s = new StringBuilder(hsk.getRow().toString());
- s.append(hsk.getColumn().toString());
- return s.toString().getBytes(HConstants.UTF8_ENCODING);
- }
-
- Text row;
- Text column;
- long timestamp;
+ private Text row;
+ private Text column;
+ private long timestamp;
/** Default constructor used in conjunction with Writable interface */
@@ -163,6 +84,7 @@
* @param timestamp timestamp value
*/
public HStoreKey(Text row, Text column, long timestamp) {
+ // Make copies by doing 'new Text(arg)'.
this.row = new Text(row);
this.column = new Text(column);
this.timestamp = timestamp;
@@ -338,5 +260,91 @@
row.readFields(in);
column.readFields(in);
timestamp = in.readLong();
+ }
+
+ // Statics
+ // TODO: Move these utility methods elsewhere (To a Column class?).
+
+ /**
+ * Extracts the column family name from a column
+ * For example, returns 'info' if the specified column was 'info:server'
+ * @param col name of column
+ * @return column famile as a TextSequence based on the passed
+ * <code>col</code>. If <code>col</code> is reused, make a new Text of
+ * the result by calling {@link TextSequence#toText()}.
+ * @throws InvalidColumnNameException
+ */
+ public static TextSequence extractFamily(final Text col)
+ throws InvalidColumnNameException {
+ return extractFamily(col, false);
+ }
+
+ /**
+ * Extracts the column family name from a column
+ * For example, returns 'info' if the specified column was 'info:server'
+ * @param col name of column
+ * @return column famile as a TextSequence based on the passed
+ * <code>col</code>. If <code>col</code> is reused, make a new Text of
+ * the result by calling {@link TextSequence#toText()}.
+ * @throws InvalidColumnNameException
+ */
+ public static TextSequence extractFamily(final Text col,
+ final boolean withColon)
+ throws InvalidColumnNameException {
+ int offset = getColonOffset(col);
+ // Include ':' in copy?
+ offset += (withColon)? 1: 0;
+ if (offset == col.getLength()) {
+ return new TextSequence(col);
+ }
+ return new TextSequence(col, 0, offset);
+ }
+
+ /**
+ * Extracts the column qualifier, the portion that follows the colon (':')
+ * family/qualifier separator.
+ * For example, returns 'server' if the specified column was 'info:server'
+ * @param col name of column
+ * @return column qualifier as a TextSequence based on the passed
+ * <code>col</code>. If <code>col</code> is reused, make a new Text of
+ * the result by calling {@link TextSequence#toText()}.
+ * @throws InvalidColumnNameException
+ */
+ public static TextSequence extractQualifier(final Text col)
+ throws InvalidColumnNameException {
+ int offset = getColonOffset(col);
+ if (offset + 1 == col.getLength()) {
+ return null;
+ }
+ return new TextSequence(col, offset + 1);
+ }
+
+ private static int getColonOffset(final Text col)
+ throws InvalidColumnNameException {
+ int offset = -1;
+ for (int i = 0; i < col.getLength(); i++) {
+ if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) {
+ offset = i;
+ break;
+ }
+ }
+ if(offset < 0) {
+ throw new InvalidColumnNameException(col + " is missing the colon " +
+ "family/qualifier separator");
+ }
+ return offset;
+ }
+
+ /**
+ * Returns row and column bytes out of an HStoreKey.
+ * @param hsk Store key.
+ * @return byte array encoding of HStoreKey
+ * @throws UnsupportedEncodingException
+ */
+ public static byte[] getBytes(final HStoreKey hsk)
+ throws UnsupportedEncodingException {
+ StringBuilder s = new StringBuilder(hsk.getRow().toString());
+ s.append(hsk.getColumn().toString());
+ return s.toString().getBytes(HConstants.UTF8_ENCODING);
}
}
Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java?rev=606332&r1=606331&r2=606332&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java Fri Dec 21 13:58:25 2007
@@ -21,7 +21,6 @@
import java.io.IOException;
import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
@@ -472,25 +471,28 @@
}
}
+ /*
+ * Format passed integer.
+ * This method takes some time and is done inline uploading data. For
+ * example, doing the mapfile test, generation of the key and value
+ * consumes about 30% of CPU time.
+ * @param i
+ * @return Integer as String zero padded.
+ */
static Text format(final int i) {
return new Text(String.format("%010d", Integer.valueOf(i)));
}
/*
+ * This method takes some time and is done inline uploading data. For
+ * example, doing the mapfile test, generation of the key and value
+ * consumes about 30% of CPU time.
* @return Generated random value to insert into a table cell.
*/
static byte[] generateValue(final Random r) {
- StringBuilder val = new StringBuilder();
- while(val.length() < ROW_LENGTH) {
- val.append(Long.toString(r.nextLong()));
- }
- byte[] value = null;
- try {
- value = val.toString().getBytes(HConstants.UTF8_ENCODING);
- } catch (UnsupportedEncodingException e) {
- assert(false);
- }
- return value;
+ byte [] b = new byte [ROW_LENGTH];
+ r.nextBytes(b);
+ return b;
}
static Text getRandomRow(final Random random, final int totalRows) {
@@ -556,7 +558,7 @@
Random random = new Random();
Configuration c = new Configuration();
FileSystem fs = FileSystem.get(c);
- Path mf = new Path("performanceevaluation.mapfile");
+ Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
if (fs.exists(mf)) {
fs.delete(mf);
}
@@ -571,7 +573,9 @@
}
writer.close();
LOG.info("Writing " + ROW_COUNT + " records took " +
- (System.currentTimeMillis() - startTime) + "ms");
+ (System.currentTimeMillis() - startTime) + "ms (Note: generation of keys " +
+ "and values is done inline and has been seen to consume " +
+ "significant time: e.g. ~30% of cpu time");
// Do random reads.
LOG.info("Reading " + ROW_COUNT + " random rows");
MapFile.Reader reader = new MapFile.Reader(fs, mf.toString(), c);
@@ -585,7 +589,9 @@
}
reader.close();
LOG.info("Reading " + ROW_COUNT + " random records took " +
- (System.currentTimeMillis() - startTime) + "ms");
+ (System.currentTimeMillis() - startTime) + "ms (Note: generation of " +
+ "random key is done in line and takes a significant amount of cpu " +
+ "time: e.g 10-15%");
// Do random reads.
LOG.info("Reading " + ROW_COUNT + " rows sequentially");
reader = new MapFile.Reader(fs, mf.toString(), c);
@@ -599,7 +605,7 @@
LOG.info("Reading " + ROW_COUNT + " records serially took " +
(System.currentTimeMillis() - startTime) + "ms");
}
-
+
private void runTest(final String cmd) throws IOException {
if (cmd.equals(RANDOM_READ_MEM)) {
// For this one test, so all fits in memory, make R smaller (See