You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2008/07/17 09:17:28 UTC
svn commit: r677517 [2/6] - in /hadoop/hbase/trunk: ./
src/java/org/apache/hadoop/hbase/ src/java/org/apache/hadoop/hbase/client/
src/java/org/apache/hadoop/hbase/ipc/
src/java/org/apache/hadoop/hbase/master/
src/java/org/apache/hadoop/hbase/regionserv...
Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HColumnDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HColumnDescriptor.java?rev=677517&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HColumnDescriptor.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HColumnDescriptor.java Thu Jul 17 00:17:26 2008
@@ -0,0 +1,449 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.BloomFilterDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * An HColumnDescriptor contains information about a column family such as the
+ * number of versions, compression settings, etc.
+ *
+ * It is used as input when creating a table or adding a column. Once set, the
+ * parameters that specify a column cannot be changed without deleting the
+ * column and recreating it. If there is data stored in the column, it will be
+ * deleted when the column is deleted.
+ */
+public class HColumnDescriptor implements WritableComparable {
+ // For future backward compatibility
+
+ // Version 3 was when column names becaome byte arrays and when we picked up
+ // Time-to-live feature.
+ // Version 4 was when bloom filter descriptors were removed.
+ private static final byte COLUMN_DESCRIPTOR_VERSION = (byte)4;
+
+ /**
+ * The type of compression.
+ * @see org.apache.hadoop.io.SequenceFile.Writer
+ */
+ public static enum CompressionType {
+ /** Do not compress records. */
+ NONE,
+ /** Compress values only, each separately. */
+ RECORD,
+ /** Compress sequences of records together in blocks. */
+ BLOCK
+ }
+
+ // Defines for jruby/shell
+ public static final String COMPRESSION = "COMPRESSION";
+ public static final String IN_MEMORY = "IN_MEMORY";
+ public static final String BLOCKCACHE = "BLOCKCACHE";
+ public static final String LENGTH = "LENGTH";
+ public static final String TTL = "TTL";
+ public static final String BLOOMFILTER = "BLOOMFILTER";
+ public static final String FOREVER = "FOREVER";
+
+ /**
+ * Default compression type.
+ */
+ public static final CompressionType DEFAULT_COMPRESSION =
+ CompressionType.NONE;
+
+ /**
+ * Default number of versions of a record to keep.
+ */
+ public static final int DEFAULT_VERSIONS = 3;
+
+ /**
+ * Default maximum cell length.
+ */
+ public static final int DEFAULT_LENGTH = Integer.MAX_VALUE;
+
+ /**
+ * Default setting for whether to serve from memory or not.
+ */
+ public static final boolean DEFAULT_IN_MEMORY = false;
+
+ /**
+ * Default setting for whether to use a block cache or not.
+ */
+ public static final boolean DEFAULT_BLOCKCACHE = false;
+
+ /**
+ * Default setting for whether or not to use bloomfilters.
+ */
+ public static final boolean DEFAULT_BLOOMFILTER = false;
+
+ /**
+ * Default time to live of cell contents.
+ */
+ public static final int DEFAULT_TTL = HConstants.FOREVER;
+
+ // Column family name
+ private byte [] name;
+ // Number of versions to keep
+ private int maxVersions = DEFAULT_VERSIONS;
+ // Compression setting if any
+ private CompressionType compressionType = DEFAULT_COMPRESSION;
+ // Serve reads from in-memory cache
+ private boolean inMemory = DEFAULT_IN_MEMORY;
+ // Serve reads from in-memory block cache
+ private boolean blockCacheEnabled = DEFAULT_BLOCKCACHE;
+ // Maximum value size
+ private int maxValueLength = DEFAULT_LENGTH;
+ // Time to live of cell contents, in seconds from last timestamp
+ private int timeToLive = DEFAULT_TTL;
+ // True if bloom filter was specified
+ private boolean bloomFilter = false;
+
+ /**
+ * Default constructor. Must be present for Writable.
+ */
+ public HColumnDescriptor() {
+ this.name = null;
+ }
+
+ /**
+ * Construct a column descriptor specifying only the family name
+ * The other attributes are defaulted.
+ *
+ * @param columnName - column family name
+ */
+ public HColumnDescriptor(final String columnName) {
+ this(Bytes.toBytes(columnName));
+ }
+
+ /**
+ * Construct a column descriptor specifying only the family name
+ * The other attributes are defaulted.
+ *
+ * @param columnName - column family name
+ */
+ public HColumnDescriptor(final Text columnName) {
+ this(columnName.getBytes());
+ }
+
+ /**
+ * Construct a column descriptor specifying only the family name
+ * The other attributes are defaulted.
+ *
+ * @param columnName Column family name. Must have the ':' ending.
+ */
+ public HColumnDescriptor(final byte [] columnName) {
+ this (columnName == null || columnName.length <= 0?
+ HConstants.EMPTY_BYTE_ARRAY: columnName, DEFAULT_VERSIONS,
+ DEFAULT_COMPRESSION, DEFAULT_IN_MEMORY, DEFAULT_BLOCKCACHE,
+ Integer.MAX_VALUE, DEFAULT_TTL, false);
+ }
+
+ /**
+ * Constructor
+ * @param columnName Column family name. Must have the ':' ending.
+ * @param maxVersions Maximum number of versions to keep
+ * @param compression Compression type
+ * @param inMemory If true, column data should be kept in an HRegionServer's
+ * cache
+ * @param blockCacheEnabled If true, MapFile blocks should be cached
+ * @param maxValueLength Restrict values to <= this value
+ * @param timeToLive Time-to-live of cell contents, in seconds from last timestamp
+ * (use HConstants.FOREVER for unlimited TTL)
+ * @param bloomFilter Enable the specified bloom filter for this column
+ *
+ * @throws IllegalArgumentException if passed a family name that is made of
+ * other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> and does not
+ * end in a <code>:</code>
+ * @throws IllegalArgumentException if the number of versions is <= 0
+ */
+ public HColumnDescriptor(final byte [] columnName, final int maxVersions,
+ final CompressionType compression, final boolean inMemory,
+ final boolean blockCacheEnabled, final int maxValueLength,
+ final int timeToLive, final boolean bloomFilter) {
+ isLegalFamilyName(columnName);
+ this.name = stripColon(columnName);
+ if (maxVersions <= 0) {
+ // TODO: Allow maxVersion of 0 to be the way you say "Keep all versions".
+ // Until there is support, consider 0 or < 0 -- a configuration error.
+ throw new IllegalArgumentException("Maximum versions must be positive");
+ }
+ this.maxVersions = maxVersions;
+ this.inMemory = inMemory;
+ this.blockCacheEnabled = blockCacheEnabled;
+ this.maxValueLength = maxValueLength;
+ this.timeToLive = timeToLive;
+ this.bloomFilter = bloomFilter;
+ this.compressionType = compression;
+ }
+
+ private static byte [] stripColon(final byte [] n) {
+ byte [] result = new byte [n.length - 1];
+ // Have the stored family name be absent the colon delimiter
+ System.arraycopy(n, 0, result, 0, n.length - 1);
+ return result;
+ }
+
+ /**
+ * @param b Family name.
+ * @return <code>b</code>
+ * @throws IllegalArgumentException If not null and not a legitimate family
+ * name: i.e. 'printable' and ends in a ':' (Null passes are allowed because
+ * <code>b</code> can be null when deserializing).
+ */
+ public static byte [] isLegalFamilyName(final byte [] b) {
+ if (b == null) {
+ return b;
+ }
+ if (b[b.length - 1] != ':') {
+ throw new IllegalArgumentException("Family names must end in a colon: " +
+ Bytes.toString(b));
+ }
+ for (int i = 0; i < (b.length - 1); i++) {
+ if (Character.isLetterOrDigit(b[i]) || b[i] == '_' || b[i] == '.') {
+ continue;
+ }
+ throw new IllegalArgumentException("Illegal character <" + b[i] +
+ ">. Family names can only contain 'word characters' and must end" +
+ "with a colon: " + Bytes.toString(b));
+ }
+ return b;
+ }
+
+ /**
+ * @return Name of this column family
+ */
+ public byte [] getName() {
+ return name;
+ }
+
+ /**
+ * @return Name of this column family
+ */
+ public String getNameAsString() {
+ return Bytes.toString(this.name);
+ }
+
+ /** @return compression type being used for the column family */
+ public CompressionType getCompression() {
+ return this.compressionType;
+ }
+
+ /** @return maximum number of versions */
+ public int getMaxVersions() {
+ return this.maxVersions;
+ }
+
+ /**
+ * @return Compression type setting.
+ */
+ public CompressionType getCompressionType() {
+ return this.compressionType;
+ }
+
+ /**
+ * @return True if we are to keep all in use HRegionServer cache.
+ */
+ public boolean isInMemory() {
+ return this.inMemory;
+ }
+
+ /**
+ * @return Maximum value length.
+ */
+ public int getMaxValueLength() {
+ return this.maxValueLength;
+ }
+
+ /**
+ * @return Time to live.
+ */
+ public int getTimeToLive() {
+ return this.timeToLive;
+ }
+
+ /**
+ * @return True if MapFile blocks should be cached.
+ */
+ public boolean isBlockCacheEnabled() {
+ return blockCacheEnabled;
+ }
+
+ /**
+ * @return true if a bloom filter is enabled
+ */
+ public boolean isBloomFilterEnabled() {
+ return this.bloomFilter;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public String toString() {
+ return "{" + HConstants.NAME + " => '" + Bytes.toString(name) +
+ "', " + HConstants.VERSIONS + " => " + maxVersions +
+ ", " + COMPRESSION + " => '" + this.compressionType +
+ "', " + IN_MEMORY + " => " + inMemory +
+ ", " + BLOCKCACHE + " => " + blockCacheEnabled +
+ ", " + LENGTH + " => " + maxValueLength +
+ ", " + TTL + " => " +
+ (timeToLive == HConstants.FOREVER ? "FOREVER" :
+ Integer.toString(timeToLive)) +
+ ", " + BLOOMFILTER + " => " + bloomFilter + "}";
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public boolean equals(Object obj) {
+ return compareTo(obj) == 0;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public int hashCode() {
+ int result = Bytes.hashCode(this.name);
+ result ^= Integer.valueOf(this.maxVersions).hashCode();
+ result ^= this.compressionType.hashCode();
+ result ^= Boolean.valueOf(this.inMemory).hashCode();
+ result ^= Boolean.valueOf(this.blockCacheEnabled).hashCode();
+ result ^= Integer.valueOf(this.maxValueLength).hashCode();
+ result ^= Integer.valueOf(this.timeToLive).hashCode();
+ result ^= Boolean.valueOf(this.bloomFilter).hashCode();
+ result ^= Byte.valueOf(COLUMN_DESCRIPTOR_VERSION).hashCode();
+ return result;
+ }
+
+ // Writable
+
+ /** {@inheritDoc} */
+ public void readFields(DataInput in) throws IOException {
+ int versionNumber = in.readByte();
+ if (versionNumber <= 2) {
+ Text t = new Text();
+ t.readFields(in);
+ this.name = t.getBytes();
+ if (HStoreKey.getFamilyDelimiterIndex(this.name) > 0) {
+ this.name = stripColon(this.name);
+ }
+ } else {
+ this.name = Bytes.readByteArray(in);
+ }
+ this.maxVersions = in.readInt();
+ int ordinal = in.readInt();
+ this.compressionType = CompressionType.values()[ordinal];
+ this.inMemory = in.readBoolean();
+ this.maxValueLength = in.readInt();
+ this.bloomFilter = in.readBoolean();
+ if (this.bloomFilter && versionNumber < 5) {
+ // If a bloomFilter is enabled and the column descriptor is less than
+ // version 5, we need to skip over it to read the rest of the column
+ // descriptor. There are no BloomFilterDescriptors written to disk for
+ // column descriptors with a version number >= 5
+ BloomFilterDescriptor junk = new BloomFilterDescriptor();
+ junk.readFields(in);
+ }
+ if (versionNumber > 1) {
+ this.blockCacheEnabled = in.readBoolean();
+ }
+
+ if (versionNumber > 2) {
+ this.timeToLive = in.readInt();
+ }
+ }
+
+ /** {@inheritDoc} */
+ public void write(DataOutput out) throws IOException {
+ out.writeByte(COLUMN_DESCRIPTOR_VERSION);
+ Bytes.writeByteArray(out, this.name);
+ out.writeInt(this.maxVersions);
+ out.writeInt(this.compressionType.ordinal());
+ out.writeBoolean(this.inMemory);
+ out.writeInt(this.maxValueLength);
+ out.writeBoolean(this.bloomFilter);
+ out.writeBoolean(this.blockCacheEnabled);
+ out.writeInt(this.timeToLive);
+ }
+
+ // Comparable
+
+ /** {@inheritDoc} */
+ public int compareTo(Object o) {
+ HColumnDescriptor other = (HColumnDescriptor)o;
+ int result = Bytes.compareTo(this.name, other.getName());
+ if(result == 0) {
+ result = Integer.valueOf(this.maxVersions).compareTo(
+ Integer.valueOf(other.maxVersions));
+ }
+
+ if(result == 0) {
+ result = this.compressionType.compareTo(other.compressionType);
+ }
+
+ if(result == 0) {
+ if(this.inMemory == other.inMemory) {
+ result = 0;
+
+ } else if(this.inMemory) {
+ result = -1;
+
+ } else {
+ result = 1;
+ }
+ }
+
+ if(result == 0) {
+ if(this.blockCacheEnabled == other.blockCacheEnabled) {
+ result = 0;
+
+ } else if(this.blockCacheEnabled) {
+ result = -1;
+
+ } else {
+ result = 1;
+ }
+ }
+
+ if(result == 0) {
+ result = other.maxValueLength - this.maxValueLength;
+ }
+
+ if(result == 0) {
+ result = other.timeToLive - this.timeToLive;
+ }
+
+ if(result == 0) {
+ if(this.bloomFilter == other.bloomFilter) {
+ result = 0;
+
+ } else if(this.bloomFilter) {
+ result = -1;
+
+ } else {
+ result = 1;
+ }
+ }
+ return result;
+ }
+}
\ No newline at end of file
Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HConstants.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HConstants.java?rev=677517&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HConstants.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HConstants.java Thu Jul 17 00:17:26 2008
@@ -0,0 +1,228 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import org.apache.hadoop.hbase.ipc.HRegionInterface;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * HConstants holds a bunch of HBase-related constants
+ */
+public interface HConstants {
+
+ /** long constant for zero */
+ static final Long ZERO_L = Long.valueOf(0L);
+
+ static final String NINES = "99999999999999";
+ static final String ZEROES = "00000000000000";
+
+ // For migration
+
+ /** name of version file */
+ static final String VERSION_FILE_NAME = "hbase.version";
+
+ /**
+ * Current version of file system
+ * Version 4 supports only one kind of bloom filter
+ */
+ public static final String FILE_SYSTEM_VERSION = "4";
+
+ // Configuration parameters
+
+ // TODO: URL for hbase master like hdfs URLs with host and port.
+ // Like jdbc URLs? URLs could be used to refer to table cells?
+ // jdbc:mysql://[host][,failoverhost...][:port]/[database]
+ // jdbc:mysql://[host][,failoverhost...][:port]/[database][?propertyName1][=propertyValue1][&propertyName2][=propertyValue2]...
+
+ // Key into HBaseConfiguration for the hbase.master address.
+ // TODO: Support 'local': i.e. default of all running in single
+ // process. Same for regionserver. TODO: Is having HBase homed
+ // on port 60k OK?
+
+ /** Parameter name for master address */
+ static final String MASTER_ADDRESS = "hbase.master";
+
+ /** default host address */
+ static final String DEFAULT_HOST = "0.0.0.0";
+
+ /** default port that the master listens on */
+ static final int DEFAULT_MASTER_PORT = 60000;
+
+ /** Default master address */
+ static final String DEFAULT_MASTER_ADDRESS = DEFAULT_HOST + ":" +
+ DEFAULT_MASTER_PORT;
+
+ /** default port for master web api */
+ static final int DEFAULT_MASTER_INFOPORT = 60010;
+
+ /** Parameter name for hbase.regionserver address. */
+ static final String REGIONSERVER_ADDRESS = "hbase.regionserver";
+
+ /** Default region server address */
+ static final String DEFAULT_REGIONSERVER_ADDRESS = DEFAULT_HOST + ":60020";
+
+ /** default port for region server web api */
+ static final int DEFAULT_REGIONSERVER_INFOPORT = 60030;
+
+ /** Parameter name for what region server interface to use. */
+ static final String REGION_SERVER_CLASS = "hbase.regionserver.class";
+
+ /** Parameter name for what region server implementation to use. */
+ static final String REGION_SERVER_IMPL= "hbase.regionserver.impl";
+
+ /** Default region server interface class name. */
+ static final String DEFAULT_REGION_SERVER_CLASS = HRegionInterface.class.getName();
+
+ /** Parameter name for how often threads should wake up */
+ static final String THREAD_WAKE_FREQUENCY = "hbase.server.thread.wakefrequency";
+
+ /** Parameter name for HBase instance root directory */
+ static final String HBASE_DIR = "hbase.rootdir";
+
+ /** Used to construct the name of the log directory for a region server */
+ static final String HREGION_LOGDIR_NAME = "log";
+
+ /** Name of old log file for reconstruction */
+ static final String HREGION_OLDLOGFILE_NAME = "oldlogfile.log";
+
+ /** Default maximum file size */
+ static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024;
+
+ /** Default size of a reservation block */
+ static final int DEFAULT_SIZE_RESERVATION_BLOCK = 1024 * 1024 * 5;
+
+ // Always store the location of the root table's HRegion.
+ // This HRegion is never split.
+
+ // region name = table + startkey + regionid. This is the row key.
+ // each row in the root and meta tables describes exactly 1 region
+ // Do we ever need to know all the information that we are storing?
+
+ // Note that the name of the root table starts with "-" and the name of the
+ // meta table starts with "." Why? it's a trick. It turns out that when we
+ // store region names in memory, we use a SortedMap. Since "-" sorts before
+ // "." (and since no other table name can start with either of these
+ // characters, the root region will always be the first entry in such a Map,
+ // followed by all the meta regions (which will be ordered by their starting
+ // row key as well), followed by all user tables. So when the Master is
+ // choosing regions to assign, it will always choose the root region first,
+ // followed by the meta regions, followed by user regions. Since the root
+ // and meta regions always need to be on-line, this ensures that they will
+ // be the first to be reassigned if the server(s) they are being served by
+ // should go down.
+
+ /** The root table's name.*/
+ static final byte [] ROOT_TABLE_NAME = Bytes.toBytes("-ROOT-");
+
+ /** The META table's name. */
+ static final byte [] META_TABLE_NAME = Bytes.toBytes(".META.");
+
+ // Defines for the column names used in both ROOT and META HBase 'meta' tables.
+
+ /** The ROOT and META column family (string) */
+ static final String COLUMN_FAMILY_STR = "info:";
+
+ /** The META historian column family (string) */
+ static final String COLUMN_FAMILY_HISTORIAN_STR = "historian:";
+
+ /** The ROOT and META column family */
+ static final byte [] COLUMN_FAMILY = Bytes.toBytes(COLUMN_FAMILY_STR);
+
+ /** The META historian column family */
+ static final byte [] COLUMN_FAMILY_HISTORIAN = Bytes.toBytes(COLUMN_FAMILY_HISTORIAN_STR);
+
+ /** Array of meta column names */
+ static final byte[][] COLUMN_FAMILY_ARRAY = new byte[][] {COLUMN_FAMILY};
+
+ /** ROOT/META column family member - contains HRegionInfo */
+ static final byte [] COL_REGIONINFO =
+ Bytes.toBytes(COLUMN_FAMILY_STR + "regioninfo");
+
+ /** Array of column - contains HRegionInfo */
+ static final byte[][] COL_REGIONINFO_ARRAY = new byte[][] {COL_REGIONINFO};
+
+ /** ROOT/META column family member - contains HServerAddress.toString() */
+ static final byte[] COL_SERVER = Bytes.toBytes(COLUMN_FAMILY_STR + "server");
+
+ /** ROOT/META column family member - contains server start code (a long) */
+ static final byte [] COL_STARTCODE =
+ Bytes.toBytes(COLUMN_FAMILY_STR + "serverstartcode");
+
+ /** the lower half of a split region */
+ static final byte [] COL_SPLITA = Bytes.toBytes(COLUMN_FAMILY_STR + "splitA");
+
+ /** the upper half of a split region */
+ static final byte [] COL_SPLITB = Bytes.toBytes(COLUMN_FAMILY_STR + "splitB");
+
+ /** All the columns in the catalog -ROOT- and .META. tables.
+ */
+ static final byte[][] ALL_META_COLUMNS = {COL_REGIONINFO, COL_SERVER,
+ COL_STARTCODE, COL_SPLITA, COL_SPLITB};
+
+ // Other constants
+
+ /**
+ * An empty instance.
+ */
+ static final byte [] EMPTY_BYTE_ARRAY = new byte [0];
+
+ /**
+ * Used by scanners, etc when they want to start at the beginning of a region
+ */
+ static final byte [] EMPTY_START_ROW = EMPTY_BYTE_ARRAY;
+
+ /**
+ * Last row in a table.
+ */
+ static final byte [] EMPTY_END_ROW = EMPTY_START_ROW;
+
+ /**
+ * Used by scanners and others when they're trying to detect the end of a
+ * table
+ */
+ static final byte [] LAST_ROW = EMPTY_BYTE_ARRAY;
+
+ /** When we encode strings, we always specify UTF8 encoding */
+ static final String UTF8_ENCODING = "UTF-8";
+
+ /**
+ * Timestamp to use when we want to refer to the latest cell.
+ * This is the timestamp sent by clients when no timestamp is specified on
+ * commit.
+ */
+ static final long LATEST_TIMESTAMP = Long.MAX_VALUE;
+
+ /**
+ * Define for 'return-all-versions'.
+ */
+ static final int ALL_VERSIONS = Integer.MAX_VALUE;
+
+ /**
+ * Unlimited time-to-live.
+ */
+ static final int FOREVER = -1;
+
+ public static final String HBASE_CLIENT_RETRIES_NUMBER_KEY =
+ "hbase.client.retries.number";
+ public static final int DEFAULT_CLIENT_RETRIES = 5;
+
+ public static final String NAME = "NAME";
+ public static final String VERSIONS = "VERSIONS";
+}
\ No newline at end of file
Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HLog.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HLog.java?rev=677517&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HLog.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HLog.java Thu Jul 17 00:17:26 2008
@@ -0,0 +1,698 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import java.io.EOFException;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.RemoteExceptionHandler;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.SequenceFile.Reader;
+
+/**
+ * HLog stores all the edits to the HStore.
+ *
+ * It performs logfile-rolling, so external callers are not aware that the
+ * underlying file is being rolled.
+ *
+ * <p>
+ * A single HLog is used by several HRegions simultaneously.
+ *
+ * <p>
+ * Each HRegion is identified by a unique long <code>int</code>. HRegions do
+ * not need to declare themselves before using the HLog; they simply include
+ * their HRegion-id in the <code>append</code> or
+ * <code>completeCacheFlush</code> calls.
+ *
+ * <p>
+ * An HLog consists of multiple on-disk files, which have a chronological order.
+ * As data is flushed to other (better) on-disk structures, the log becomes
+ * obsolete. We can destroy all the log messages for a given HRegion-id up to
+ * the most-recent CACHEFLUSH message from that HRegion.
+ *
+ * <p>
+ * It's only practical to delete entire files. Thus, we delete an entire on-disk
+ * file F when all of the messages in F have a log-sequence-id that's older
+ * (smaller) than the most-recent CACHEFLUSH message for every HRegion that has
+ * a message in F.
+ *
+ * <p>
+ * Synchronized methods can never execute in parallel. However, between the
+ * start of a cache flush and the completion point, appends are allowed but log
+ * rolling is not. To prevent log rolling taking place during this period, a
+ * separate reentrant lock is used.
+ *
+ * <p>
+ * TODO: Vuk Ercegovac also pointed out that keeping HBase HRegion edit logs in
+ * HDFS is currently flawed. HBase writes edits to logs and to a memcache. The
+ * 'atomic' write to the log is meant to serve as insurance against abnormal
+ * RegionServer exit: on startup, the log is rerun to reconstruct an HRegion's
+ * last wholesome state. But files in HDFS do not 'exist' until they are cleanly
+ * closed -- something that will not happen if RegionServer exits without
+ * running its 'close'.
+ */
+public class HLog implements HConstants {
+ private static final Log LOG = LogFactory.getLog(HLog.class);
+ private static final String HLOG_DATFILE = "hlog.dat.";
+ static final byte [] METACOLUMN = Bytes.toBytes("METACOLUMN:");
+ static final byte [] METAROW = Bytes.toBytes("METAROW");
+ final FileSystem fs;
+ final Path dir;
+ final Configuration conf;
+ final LogRollListener listener;
+ final long threadWakeFrequency;
+ private final int maxlogentries;
+
+ /*
+ * Current log file.
+ */
+ SequenceFile.Writer writer;
+
+ /*
+ * Map of all log files but the current one.
+ */
+ final SortedMap<Long, Path> outputfiles =
+ Collections.synchronizedSortedMap(new TreeMap<Long, Path>());
+
+ /*
+ * Map of region to last sequence/edit id.
+ */
+ private final Map<byte [], Long> lastSeqWritten = Collections.
+ synchronizedSortedMap(new TreeMap<byte [], Long>(Bytes.BYTES_COMPARATOR));
+
+ private volatile boolean closed = false;
+
+ private final Integer sequenceLock = new Integer(0);
+ private volatile long logSeqNum = 0;
+
+ private volatile long filenum = 0;
+ private volatile long old_filenum = -1;
+
+ private volatile int numEntries = 0;
+
+ // This lock prevents starting a log roll during a cache flush.
+ // synchronized is insufficient because a cache flush spans two method calls.
+ private final Lock cacheFlushLock = new ReentrantLock();
+
+ // We synchronize on updateLock to prevent updates and to prevent a log roll
+ // during an update
+ private final Integer updateLock = new Integer(0);
+
+ /**
+ * Create an edit log at the given <code>dir</code> location.
+ *
+ * You should never have to load an existing log. If there is a log at
+ * startup, it should have already been processed and deleted by the time the
+ * HLog object is started up.
+ *
+ * @param fs
+ * @param dir
+ * @param conf
+ * @param listener
+ * @throws IOException
+ */
+ public HLog(final FileSystem fs, final Path dir, final Configuration conf,
+ final LogRollListener listener) throws IOException {
+ this.fs = fs;
+ this.dir = dir;
+ this.conf = conf;
+ this.listener = listener;
+ this.threadWakeFrequency = conf.getLong(THREAD_WAKE_FREQUENCY, 10 * 1000);
+ this.maxlogentries =
+ conf.getInt("hbase.regionserver.maxlogentries", 30 * 1000);
+ if (fs.exists(dir)) {
+ throw new IOException("Target HLog directory already exists: " + dir);
+ }
+ fs.mkdirs(dir);
+ rollWriter();
+ }
+
+ /*
+ * Accessor for tests.
+ * @return Current state of the monotonically increasing file id.
+ */
+ long getFilenum() {
+ return this.filenum;
+ }
+
+ /**
+ * Get the compression type for the hlog files.
+ * @param c Configuration to use.
+ * @return the kind of compression to use
+ */
+ private static CompressionType getCompressionType(final Configuration c) {
+ String name = c.get("hbase.io.seqfile.compression.type");
+ return name == null? CompressionType.NONE: CompressionType.valueOf(name);
+ }
+
+ /**
+ * Called by HRegionServer when it opens a new region to ensure that log
+ * sequence numbers are always greater than the latest sequence number of the
+ * region being brought on-line.
+ *
+ * @param newvalue We'll set log edit/sequence number to this value if it
+ * is greater than the current value.
+ */
+ void setSequenceNumber(long newvalue) {
+ synchronized (sequenceLock) {
+ if (newvalue > logSeqNum) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("changing sequence number from " + logSeqNum + " to " +
+ newvalue);
+ }
+ logSeqNum = newvalue;
+ }
+ }
+ }
+
+ /**
+ * Roll the log writer. That is, start writing log messages to a new file.
+ *
+ * Because a log cannot be rolled during a cache flush, and a cache flush
+ * spans two method calls, a special lock needs to be obtained so that a cache
+ * flush cannot start when the log is being rolled and the log cannot be
+ * rolled during a cache flush.
+ *
+ * <p>Note that this method cannot be synchronized because it is possible that
+ * startCacheFlush runs, obtaining the cacheFlushLock, then this method could
+ * start which would obtain the lock on this but block on obtaining the
+ * cacheFlushLock and then completeCacheFlush could be called which would wait
+ * for the lock on this and consequently never release the cacheFlushLock
+ *
+ * @throws IOException
+ */
+ public void rollWriter() throws IOException {
+ this.cacheFlushLock.lock();
+ try {
+ if (closed) {
+ return;
+ }
+ synchronized (updateLock) {
+ if (this.writer != null) {
+ // Close the current writer, get a new one.
+ this.writer.close();
+ Path p = computeFilename(old_filenum);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Closing current log writer " + FSUtils.getPath(p));
+ }
+ if (filenum > 0) {
+ synchronized (this.sequenceLock) {
+ this.outputfiles.put(Long.valueOf(this.logSeqNum - 1), p);
+ }
+ }
+ }
+ old_filenum = filenum;
+ filenum = System.currentTimeMillis();
+ Path newPath = computeFilename(filenum);
+ this.writer = SequenceFile.createWriter(this.fs, this.conf, newPath,
+ HLogKey.class, HLogEdit.class, getCompressionType(this.conf));
+ LOG.info("New log writer created at " + FSUtils.getPath(newPath));
+
+ // Can we delete any of the old log files?
+ if (this.outputfiles.size() > 0) {
+ if (this.lastSeqWritten.size() <= 0) {
+ LOG.debug("Last sequence written is empty. Deleting all old hlogs");
+ // If so, then no new writes have come in since all regions were
+ // flushed (and removed from the lastSeqWritten map). Means can
+ // remove all but currently open log file.
+ for (Map.Entry<Long, Path> e : this.outputfiles.entrySet()) {
+ deleteLogFile(e.getValue(), e.getKey());
+ }
+ this.outputfiles.clear();
+ } else {
+ // Get oldest edit/sequence id. If logs are older than this id,
+ // then safe to remove.
+ Long oldestOutstandingSeqNum =
+ Collections.min(this.lastSeqWritten.values());
+ // Get the set of all log files whose final ID is older than or
+ // equal to the oldest pending region operation
+ TreeSet<Long> sequenceNumbers =
+ new TreeSet<Long>(this.outputfiles.headMap(
+ (Long.valueOf(oldestOutstandingSeqNum.longValue() + 1L))).keySet());
+ // Now remove old log files (if any)
+ if (LOG.isDebugEnabled()) {
+ // Find region associated with oldest key -- helps debugging.
+ byte [] oldestRegion = null;
+ for (Map.Entry<byte [], Long> e: this.lastSeqWritten.entrySet()) {
+ if (e.getValue().longValue() == oldestOutstandingSeqNum.longValue()) {
+ oldestRegion = e.getKey();
+ break;
+ }
+ }
+ if (LOG.isDebugEnabled() && sequenceNumbers.size() > 0) {
+ LOG.debug("Found " + sequenceNumbers.size() +
+ " logs to remove " +
+ "using oldest outstanding seqnum of " +
+ oldestOutstandingSeqNum + " from region " + oldestRegion);
+ }
+ }
+ if (sequenceNumbers.size() > 0) {
+ for (Long seq : sequenceNumbers) {
+ deleteLogFile(this.outputfiles.remove(seq), seq);
+ }
+ }
+ }
+ }
+ this.numEntries = 0;
+ }
+ } finally {
+ this.cacheFlushLock.unlock();
+ }
+ }
+
+ private void deleteLogFile(final Path p, final Long seqno) throws IOException {
+ LOG.info("removing old log file " + FSUtils.getPath(p) +
+ " whose highest sequence/edit id is " + seqno);
+ this.fs.delete(p, true);
+ }
+
+ /**
+ * This is a convenience method that computes a new filename with a given
+ * file-number.
+ */
+ Path computeFilename(final long fn) {
+ return new Path(dir, HLOG_DATFILE + fn);
+ }
+
+ /**
+ * Shut down the log and delete the log directory
+ *
+ * @throws IOException
+ */
+ public void closeAndDelete() throws IOException {
+ close();
+ fs.delete(dir, true);
+ }
+
+ /**
+ * Shut down the log.
+ *
+ * @throws IOException
+ */
+ void close() throws IOException {
+ cacheFlushLock.lock();
+ try {
+ synchronized (updateLock) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("closing log writer in " + this.dir.toString());
+ }
+ this.writer.close();
+ this.closed = true;
+ }
+ } finally {
+ cacheFlushLock.unlock();
+ }
+ }
+
+ /**
+ * Append a set of edits to the log. Log edits are keyed by regionName,
+ * rowname, and log-sequence-id.
+ *
+ * Later, if we sort by these keys, we obtain all the relevant edits for a
+ * given key-range of the HRegion (TODO). Any edits that do not have a
+ * matching {@link HConstants#COMPLETE_CACHEFLUSH} message can be discarded.
+ *
+ * <p>
+ * Logs cannot be restarted once closed, or once the HLog process dies. Each
+ * time the HLog starts, it must create a new log. This means that other
+ * systems should process the log appropriately upon each startup (and prior
+ * to initializing HLog).
+ *
+ * synchronized prevents appends during the completion of a cache flush or for
+ * the duration of a log roll.
+ *
+ * @param regionName
+ * @param tableName
+ * @param row
+ * @param columns
+ * @param timestamp
+ * @throws IOException
+ */
+ void append(byte [] regionName, byte [] tableName,
+ TreeMap<HStoreKey, byte[]> edits)
+ throws IOException {
+ if (closed) {
+ throw new IOException("Cannot append; log is closed");
+ }
+ synchronized (updateLock) {
+ long seqNum[] = obtainSeqNum(edits.size());
+ // The 'lastSeqWritten' map holds the sequence number of the oldest
+ // write for each region. When the cache is flushed, the entry for the
+ // region being flushed is removed if the sequence number of the flush
+ // is greater than or equal to the value in lastSeqWritten.
+ if (!this.lastSeqWritten.containsKey(regionName)) {
+ this.lastSeqWritten.put(regionName, Long.valueOf(seqNum[0]));
+ }
+ int counter = 0;
+ for (Map.Entry<HStoreKey, byte[]> es : edits.entrySet()) {
+ HStoreKey key = es.getKey();
+ HLogKey logKey =
+ new HLogKey(regionName, tableName, key.getRow(), seqNum[counter++]);
+ HLogEdit logEdit =
+ new HLogEdit(key.getColumn(), es.getValue(), key.getTimestamp());
+ try {
+ this.writer.append(logKey, logEdit);
+ } catch (IOException e) {
+ LOG.fatal("Could not append. Requesting close of log", e);
+ requestLogRoll();
+ throw e;
+ }
+ this.numEntries++;
+ }
+ }
+ if (this.numEntries > this.maxlogentries) {
+ requestLogRoll();
+ }
+ }
+
+ private void requestLogRoll() {
+ if (this.listener != null) {
+ this.listener.logRollRequested();
+ }
+ }
+
+ /** @return How many items have been added to the log */
+ int getNumEntries() {
+ return numEntries;
+ }
+
+ /**
+ * Obtain a log sequence number.
+ */
+ private long obtainSeqNum() {
+ long value;
+ synchronized (sequenceLock) {
+ value = logSeqNum++;
+ }
+ return value;
+ }
+
+ /** @return the number of log files in use */
+ int getNumLogFiles() {
+ return outputfiles.size();
+ }
+
+ /**
+ * Obtain a specified number of sequence numbers
+ *
+ * @param num number of sequence numbers to obtain
+ * @return array of sequence numbers
+ */
+ private long[] obtainSeqNum(int num) {
+ long[] results = new long[num];
+ synchronized (this.sequenceLock) {
+ for (int i = 0; i < num; i++) {
+ results[i] = this.logSeqNum++;
+ }
+ }
+ return results;
+ }
+
+ /**
+ * By acquiring a log sequence ID, we can allow log messages to continue while
+ * we flush the cache.
+ *
+ * Acquire a lock so that we do not roll the log between the start and
+ * completion of a cache-flush. Otherwise the log-seq-id for the flush will
+ * not appear in the correct logfile.
+ *
+ * @return sequence ID to pass {@link #completeCacheFlush(Text, Text, long)}
+ * @see #completeCacheFlush(Text, Text, long)
+ * @see #abortCacheFlush()
+ */
+ long startCacheFlush() {
+ this.cacheFlushLock.lock();
+ return obtainSeqNum();
+ }
+
+ /**
+ * Complete the cache flush
+ *
+ * Protected by cacheFlushLock
+ *
+ * @param regionName
+ * @param tableName
+ * @param logSeqId
+ * @throws IOException
+ */
+ void completeCacheFlush(final byte [] regionName, final byte [] tableName,
+ final long logSeqId) throws IOException {
+
+ try {
+ if (this.closed) {
+ return;
+ }
+ synchronized (updateLock) {
+ this.writer.append(new HLogKey(regionName, tableName, HLog.METAROW, logSeqId),
+ new HLogEdit(HLog.METACOLUMN, HLogEdit.completeCacheFlush.get(),
+ System.currentTimeMillis()));
+ this.numEntries++;
+ Long seq = this.lastSeqWritten.get(regionName);
+ if (seq != null && logSeqId >= seq.longValue()) {
+ this.lastSeqWritten.remove(regionName);
+ }
+ }
+ } finally {
+ this.cacheFlushLock.unlock();
+ }
+ }
+
+ /**
+ * Abort a cache flush.
+ * Call if the flush fails. Note that the only recovery for an aborted flush
+ * currently is a restart of the regionserver so the snapshot content dropped
+ * by the failure gets restored to the memcache.
+ */
+ void abortCacheFlush() {
+ this.cacheFlushLock.unlock();
+ }
+
+ /**
+ * Split up a bunch of log files, that are no longer being written to, into
+ * new files, one per region. Delete the old log files when finished.
+ *
+ * @param rootDir qualified root directory of the HBase instance
+ * @param srcDir Directory of log files to split: e.g.
+ * <code>${ROOTDIR}/log_HOST_PORT</code>
+ * @param fs FileSystem
+ * @param conf HBaseConfiguration
+ * @throws IOException
+ */
+ public static void splitLog(Path rootDir, Path srcDir, FileSystem fs,
+ Configuration conf) throws IOException {
+ if (!fs.exists(srcDir)) {
+ // Nothing to do
+ return;
+ }
+ FileStatus logfiles[] = fs.listStatus(srcDir);
+ if (logfiles == null || logfiles.length == 0) {
+ // Nothing to do
+ return;
+ }
+ LOG.info("splitting " + logfiles.length + " log(s) in " +
+ srcDir.toString());
+ Map<byte [], SequenceFile.Writer> logWriters =
+ new TreeMap<byte [], SequenceFile.Writer>(Bytes.BYTES_COMPARATOR);
+ try {
+ for (int i = 0; i < logfiles.length; i++) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Splitting " + i + " of " + logfiles.length + ": " +
+ logfiles[i].getPath());
+ }
+ // Check for empty file.
+ if (logfiles[i].getLen() <= 0) {
+ LOG.info("Skipping " + logfiles[i].toString() +
+ " because zero length");
+ continue;
+ }
+ HLogKey key = new HLogKey();
+ HLogEdit val = new HLogEdit();
+ SequenceFile.Reader in =
+ new SequenceFile.Reader(fs, logfiles[i].getPath(), conf);
+ try {
+ int count = 0;
+ for (; in.next(key, val); count++) {
+ byte [] tableName = key.getTablename();
+ byte [] regionName = key.getRegionName();
+ SequenceFile.Writer w = logWriters.get(regionName);
+ if (w == null) {
+ Path logfile = new Path(
+ HRegion.getRegionDir(
+ HTableDescriptor.getTableDir(rootDir, tableName),
+ HRegionInfo.encodeRegionName(regionName)),
+ HREGION_OLDLOGFILE_NAME);
+ Path oldlogfile = null;
+ SequenceFile.Reader old = null;
+ if (fs.exists(logfile)) {
+ LOG.warn("Old log file " + logfile +
+ " already exists. Copying existing file to new file");
+ oldlogfile = new Path(logfile.toString() + ".old");
+ fs.rename(logfile, oldlogfile);
+ old = new SequenceFile.Reader(fs, oldlogfile, conf);
+ }
+ w = SequenceFile.createWriter(fs, conf, logfile, HLogKey.class,
+ HLogEdit.class, getCompressionType(conf));
+ // Use copy of regionName; regionName object is reused inside in
+ // HStoreKey.getRegionName so its content changes as we iterate.
+ logWriters.put(regionName, w);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Creating new log file writer for path " + logfile +
+ " and region " + regionName);
+ }
+
+ if (old != null) {
+ // Copy from existing log file
+ HLogKey oldkey = new HLogKey();
+ HLogEdit oldval = new HLogEdit();
+ for (; old.next(oldkey, oldval); count++) {
+ if (LOG.isDebugEnabled() && count > 0 && count % 10000 == 0) {
+ LOG.debug("Copied " + count + " edits");
+ }
+ w.append(oldkey, oldval);
+ }
+ old.close();
+ fs.delete(oldlogfile, true);
+ }
+ }
+ w.append(key, val);
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Applied " + count + " total edits from " +
+ logfiles[i].getPath().toString());
+ }
+ } catch (IOException e) {
+ e = RemoteExceptionHandler.checkIOException(e);
+ if (!(e instanceof EOFException)) {
+ LOG.warn("Exception processing " + logfiles[i].getPath() +
+ " -- continuing. Possible DATA LOSS!", e);
+ }
+ } finally {
+ try {
+ in.close();
+ } catch (IOException e) {
+ LOG.warn("Close in finally threw exception -- continuing", e);
+ }
+ // Delete the input file now so we do not replay edits. We could
+ // have gotten here because of an exception. If so, probably
+ // nothing we can do about it. Replaying it, it could work but we
+ // could be stuck replaying for ever. Just continue though we
+ // could have lost some edits.
+ fs.delete(logfiles[i].getPath(), true);
+ }
+ }
+ } finally {
+ for (SequenceFile.Writer w : logWriters.values()) {
+ w.close();
+ }
+ }
+
+ try {
+ fs.delete(srcDir, true);
+ } catch (IOException e) {
+ e = RemoteExceptionHandler.checkIOException(e);
+ IOException io = new IOException("Cannot delete: " + srcDir);
+ io.initCause(e);
+ throw io;
+ }
+ LOG.info("log file splitting completed for " + srcDir.toString());
+ }
+
+ private static void usage() {
+ System.err.println("Usage: java org.apache.hbase.HLog" +
+ " {--dump <logfile>... | --split <logdir>...}");
+ }
+
+ /**
+ * Pass one or more log file names and it will either dump out a text version
+ * on <code>stdout</code> or split the specified log files.
+ *
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException {
+ if (args.length < 2) {
+ usage();
+ System.exit(-1);
+ }
+ boolean dump = true;
+ if (args[0].compareTo("--dump") != 0) {
+ if (args[0].compareTo("--split") == 0) {
+ dump = false;
+
+ } else {
+ usage();
+ System.exit(-1);
+ }
+ }
+ Configuration conf = new HBaseConfiguration();
+ FileSystem fs = FileSystem.get(conf);
+ Path baseDir = new Path(conf.get(HBASE_DIR));
+
+ for (int i = 1; i < args.length; i++) {
+ Path logPath = new Path(args[i]);
+ if (!fs.exists(logPath)) {
+ throw new FileNotFoundException(args[i] + " does not exist");
+ }
+ if (dump) {
+ if (!fs.isFile(logPath)) {
+ throw new IOException(args[i] + " is not a file");
+ }
+ Reader log = new SequenceFile.Reader(fs, logPath, conf);
+ try {
+ HLogKey key = new HLogKey();
+ HLogEdit val = new HLogEdit();
+ while (log.next(key, val)) {
+ System.out.println(key.toString() + " " + val.toString());
+ }
+ } finally {
+ log.close();
+ }
+ } else {
+ if (!fs.getFileStatus(logPath).isDir()) {
+ throw new IOException(args[i] + " is not a directory");
+ }
+ splitLog(baseDir, logPath, fs, conf);
+ }
+ }
+ }
+}
Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HLogEdit.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HLogEdit.java?rev=677517&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HLogEdit.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HLogEdit.java Thu Jul 17 00:17:26 2008
@@ -0,0 +1,141 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.*;
+
+import java.io.*;
+
+import org.apache.hadoop.hbase.HConstants;
+
+/**
+ * A log value.
+ *
+ * These aren't sortable; you need to sort by the matching HLogKey.
+ * The table and row are already identified in HLogKey.
+ * This just indicates the column and value.
+ */
+public class HLogEdit implements Writable, HConstants {
+
+ /** Value stored for a deleted item */
+ public static ImmutableBytesWritable deleteBytes = null;
+
+ /** Value written to HLog on a complete cache flush */
+ public static ImmutableBytesWritable completeCacheFlush = null;
+
+ static {
+ try {
+ deleteBytes =
+ new ImmutableBytesWritable("HBASE::DELETEVAL".getBytes(UTF8_ENCODING));
+
+ completeCacheFlush =
+ new ImmutableBytesWritable("HBASE::CACHEFLUSH".getBytes(UTF8_ENCODING));
+
+ } catch (UnsupportedEncodingException e) {
+ assert(false);
+ }
+ }
+
+ /**
+ * @param value
+ * @return True if an entry and its content is {@link #deleteBytes}.
+ */
+ public static boolean isDeleted(final byte [] value) {
+ return (value == null)? false: deleteBytes.compareTo(value) == 0;
+ }
+
+ private byte [] column;
+ private byte [] val;
+ private long timestamp;
+ private static final int MAX_VALUE_LEN = 128;
+
+ /**
+ * Default constructor used by Writable
+ */
+ public HLogEdit() {
+ super();
+ }
+
+ /**
+ * Construct a fully initialized HLogEdit
+ * @param c column name
+ * @param bval value
+ * @param timestamp timestamp for modification
+ */
+ public HLogEdit(byte [] c, byte [] bval, long timestamp) {
+ this.column = c;
+ this.val = bval;
+ this.timestamp = timestamp;
+ }
+
+ /** @return the column */
+ public byte [] getColumn() {
+ return this.column;
+ }
+
+ /** @return the value */
+ public byte [] getVal() {
+ return this.val;
+ }
+
+ /** @return the timestamp */
+ public long getTimestamp() {
+ return this.timestamp;
+ }
+
+ /**
+ * @return First column name, timestamp, and first 128 bytes of the value
+ * bytes as a String.
+ */
+ @Override
+ public String toString() {
+ String value = "";
+ try {
+ value = (this.val.length > MAX_VALUE_LEN)?
+ new String(this.val, 0, MAX_VALUE_LEN, HConstants.UTF8_ENCODING) +
+ "...":
+ new String(getVal(), HConstants.UTF8_ENCODING);
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("UTF8 encoding not present?", e);
+ }
+ return "(" + Bytes.toString(getColumn()) + "/" + getTimestamp() + "/" +
+ value + ")";
+ }
+
+ // Writable
+
+ /** {@inheritDoc} */
+ public void write(DataOutput out) throws IOException {
+ Bytes.writeByteArray(out, this.column);
+ out.writeInt(this.val.length);
+ out.write(this.val);
+ out.writeLong(timestamp);
+ }
+
+ /** {@inheritDoc} */
+ public void readFields(DataInput in) throws IOException {
+ this.column = Bytes.readByteArray(in);
+ this.val = new byte[in.readInt()];
+ in.readFully(this.val);
+ this.timestamp = in.readLong();
+ }
+}
Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HLogKey.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HLogKey.java?rev=677517&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HLogKey.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/migration/v5/HLogKey.java Thu Jul 17 00:17:26 2008
@@ -0,0 +1,161 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.*;
+
+import java.io.*;
+
+/**
+ * A Key for an entry in the change log.
+ *
+ * The log intermingles edits to many tables and rows, so each log entry
+ * identifies the appropriate table and row. Within a table and row, they're
+ * also sorted.
+ */
+public class HLogKey implements WritableComparable {
+ private byte [] regionName;
+ private byte [] tablename;
+ private byte [] row;
+ private long logSeqNum;
+
+ /** Create an empty key useful when deserializing */
+ public HLogKey() {
+ this(null, null, null, 0L);
+ }
+
+ /**
+ * Create the log key!
+ * We maintain the tablename mainly for debugging purposes.
+ * A regionName is always a sub-table object.
+ *
+ * @param regionName - name of region
+ * @param tablename - name of table
+ * @param row - row key
+ * @param logSeqNum - log sequence number
+ */
+ public HLogKey(final byte [] regionName, final byte [] tablename,
+ final byte [] row, long logSeqNum) {
+ this.regionName = regionName;
+ this.tablename = tablename;
+ this.row = row;
+ this.logSeqNum = logSeqNum;
+ }
+
+ //////////////////////////////////////////////////////////////////////////////
+ // A bunch of accessors
+ //////////////////////////////////////////////////////////////////////////////
+
+ byte [] getRegionName() {
+ return regionName;
+ }
+
+ byte [] getTablename() {
+ return tablename;
+ }
+
+ byte [] getRow() {
+ return row;
+ }
+
+ long getLogSeqNum() {
+ return logSeqNum;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public String toString() {
+ return Bytes.toString(tablename) + "/" + Bytes.toString(regionName) + "/" +
+ Bytes.toString(row) + "/" + logSeqNum;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean equals(Object obj) {
+ return compareTo(obj) == 0;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int hashCode() {
+ int result = this.regionName.hashCode();
+ result ^= this.row.hashCode();
+ result ^= this.logSeqNum;
+ return result;
+ }
+
+ //
+ // Comparable
+ //
+
+ /**
+ * {@inheritDoc}
+ */
+ public int compareTo(Object o) {
+ HLogKey other = (HLogKey) o;
+ int result = Bytes.compareTo(this.regionName, other.regionName);
+
+ if(result == 0) {
+ result = Bytes.compareTo(this.row, other.row);
+
+ if(result == 0) {
+
+ if (this.logSeqNum < other.logSeqNum) {
+ result = -1;
+
+ } else if (this.logSeqNum > other.logSeqNum) {
+ result = 1;
+ }
+ }
+ }
+ return result;
+ }
+
+ //
+ // Writable
+ //
+
+ /**
+ * {@inheritDoc}
+ */
+ public void write(DataOutput out) throws IOException {
+ Bytes.writeByteArray(out, this.regionName);
+ Bytes.writeByteArray(out, this.tablename);
+ Bytes.writeByteArray(out, this.row);
+ out.writeLong(logSeqNum);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void readFields(DataInput in) throws IOException {
+ this.regionName = Bytes.readByteArray(in);
+ this.tablename = Bytes.readByteArray(in);
+ this.row = Bytes.readByteArray(in);
+ this.logSeqNum = in.readLong();
+ }
+}
\ No newline at end of file