You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jm...@apache.org on 2015/02/22 21:55:49 UTC
[16/50] [abbrv] hbase git commit: HBASE-11567 Write bulk load COMMIT
events to WAL (Alex Newman, Jeffrey Zhong)
http://git-wip-us.apache.org/repos/asf/hbase/blob/3f442773/hbase-protocol/src/main/protobuf/WAL.proto
----------------------------------------------------------------------
diff --git a/hbase-protocol/src/main/protobuf/WAL.proto b/hbase-protocol/src/main/protobuf/WAL.proto
index f8a1534..169a9b2 100644
--- a/hbase-protocol/src/main/protobuf/WAL.proto
+++ b/hbase-protocol/src/main/protobuf/WAL.proto
@@ -22,6 +22,7 @@ option java_generate_equals_and_hash = true;
option optimize_for = SPEED;
import "HBase.proto";
+import "Client.proto";
message WALHeader {
optional bool has_compression = 1;
@@ -123,6 +124,22 @@ message FlushDescriptor {
repeated StoreFlushDescriptor store_flushes = 5;
}
+message StoreDescriptor {
+ required bytes family_name = 1;
+ required string store_home_dir = 2; //relative to region dir
+ repeated string store_file = 3; // relative to store dir
+}
+
+/**
+ * Special WAL entry used for writing bulk load events to WAL
+ */
+message BulkLoadDescriptor {
+ required TableName table_name = 1;
+ required bytes encoded_region_name = 2;
+ repeated StoreDescriptor stores = 3;
+ required int64 bulkload_seq_num = 4;
+}
+
/**
* Special WAL entry to hold all related to a region event (open/close).
*/
@@ -132,12 +149,6 @@ message RegionEventDescriptor {
REGION_CLOSE = 1;
}
- message StoreDescriptor {
- required bytes family_name = 1;
- required string store_home_dir = 2; //relative to region dir
- repeated string store_file = 3; // relative to store dir
- }
-
required EventType event_type = 1;
required bytes table_name = 2;
required bytes encoded_region_name = 3;
http://git-wip-us.apache.org/repos/asf/hbase/blob/3f442773/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 3312df7..53e732a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -61,6 +61,7 @@ import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
+import com.google.protobuf.ByteString;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -128,6 +129,7 @@ import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoRespo
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
+import org.apache.hadoop.hbase.protobuf.generated.WALProtos;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor.FlushAction;
@@ -141,6 +143,7 @@ import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.regionserver.wal.WALUtil;
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
+import org.apache.hadoop.hbase.util.ByteStringer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CancelableProgressable;
import org.apache.hadoop.hbase.util.ClassSize;
@@ -221,7 +224,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
* This is the global default value for durability. All tables/mutations not
* defining a durability or using USE_DEFAULT will default to this value.
*/
- private static final Durability DEFAULT_DURABLITY = Durability.SYNC_WAL;
+ private static final Durability DEFAULT_DURABILITY = Durability.SYNC_WAL;
final AtomicBoolean closed = new AtomicBoolean(false);
/* Closing can take some time; use the closing flag if there is stuff we don't
@@ -664,7 +667,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
this.rowProcessorTimeout = conf.getLong(
"hbase.hregion.row.processor.timeout", DEFAULT_ROW_PROCESSOR_TIMEOUT);
this.durability = htd.getDurability() == Durability.USE_DEFAULT
- ? DEFAULT_DURABLITY
+ ? DEFAULT_DURABILITY
: htd.getDurability();
if (rsServices != null) {
this.rsAccounting = this.rsServices.getRegionServerAccounting();
@@ -753,7 +756,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
}
private long initializeRegionInternals(final CancelableProgressable reporter,
- final MonitoredTask status) throws IOException, UnsupportedEncodingException {
+ final MonitoredTask status) throws IOException {
if (coprocessorHost != null) {
status.setStatus("Running coprocessor pre-open hook");
coprocessorHost.preOpen();
@@ -830,7 +833,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
}
private long initializeRegionStores(final CancelableProgressable reporter, MonitoredTask status)
- throws IOException, UnsupportedEncodingException {
+ throws IOException {
// Load in all the HStores.
long maxSeqId = -1;
@@ -2001,8 +2004,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
long oldestUnflushedSeqId = wal.getEarliestMemstoreSeqNum(encodedRegionName);
// no oldestUnflushedSeqId means we flushed all stores.
// or the unflushed stores are all empty.
- flushedSeqId =
- oldestUnflushedSeqId == HConstants.NO_SEQNUM ? flushOpSeqId : oldestUnflushedSeqId - 1;
+ flushedSeqId = (oldestUnflushedSeqId == HConstants.NO_SEQNUM) ? flushOpSeqId
+ : oldestUnflushedSeqId - 1;
} else {
// use the provided sequence Id as WAL is not being used for this flush.
flushedSeqId = flushOpSeqId = myseqid;
@@ -2265,7 +2268,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
return getScanner(scan, null);
}
- void prepareScanner(Scan scan) throws IOException {
+ void prepareScanner(Scan scan) {
if(!scan.hasFamilies()) {
// Adding all families to scanner
for(byte[] family: this.htableDescriptor.getFamiliesKeys()){
@@ -3241,7 +3244,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
closeRegionOperation();
}
}
- private void doBatchMutate(Mutation mutation) throws IOException, DoNotRetryIOException {
+ private void doBatchMutate(Mutation mutation) throws IOException {
// Currently this is only called for puts and deletes, so no nonces.
OperationStatus[] batchMutate = this.batchMutate(new Mutation[] { mutation },
HConstants.NO_NONCE, HConstants.NO_NONCE);
@@ -3595,7 +3598,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
protected long replayRecoveredEditsIfAny(final Path regiondir,
Map<byte[], Long> maxSeqIdInStores,
final CancelableProgressable reporter, final MonitoredTask status)
- throws UnsupportedEncodingException, IOException {
+ throws IOException {
long minSeqIdForTheRegion = -1;
for (Long maxSeqIdInStore : maxSeqIdInStores.values()) {
if (maxSeqIdInStore < minSeqIdForTheRegion || minSeqIdForTheRegion == -1) {
@@ -4101,7 +4104,17 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
return multipleFamilies;
}
-
+ /**
+ * Bulk load a/many HFiles into this region
+ *
+ * @param familyPaths A list which maps column families to the location of the HFile to load
+ * into that column family region.
+ * @param assignSeqId Force a flush, get it's sequenceId to preserve the guarantee that all the
+ * edits lower than the highest sequential ID from all the HFiles are flushed
+ * on disk.
+ * @return true if successful, false if failed recoverably
+ * @throws IOException if failed unrecoverably.
+ */
public boolean bulkLoadHFiles(List<Pair<byte[], String>> familyPaths,
boolean assignSeqId) throws IOException {
return bulkLoadHFiles(familyPaths, assignSeqId, null);
@@ -4111,14 +4124,19 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
* Attempts to atomically load a group of hfiles. This is critical for loading
* rows with multiple column families atomically.
*
- * @param familyPaths List of Pair<byte[] column family, String hfilePath>
+ * @param familyPaths List of Pair<byte[] column family, String hfilePath>
* @param bulkLoadListener Internal hooks enabling massaging/preparation of a
- * file about to be bulk loaded
+ * file about to be bulk loaded
+ * @param assignSeqId Force a flush, get it's sequenceId to preserve the guarantee that
+ * all the edits lower than the highest sequential ID from all the
+ * HFiles are flushed on disk.
* @return true if successful, false if failed recoverably
* @throws IOException if failed unrecoverably.
*/
public boolean bulkLoadHFiles(List<Pair<byte[], String>> familyPaths, boolean assignSeqId,
BulkLoadListener bulkLoadListener) throws IOException {
+ long seqId = -1;
+ Map<byte[], List<Path>> storeFiles = new TreeMap<byte[], List<Path>>(Bytes.BYTES_COMPARATOR);
Preconditions.checkNotNull(familyPaths);
// we need writeLock for multi-family bulk load
startBulkRegionOperation(hasMultipleColumnFamilies(familyPaths));
@@ -4164,7 +4182,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
StringBuilder list = new StringBuilder();
for (Pair<byte[], String> p : failures) {
list.append("\n").append(Bytes.toString(p.getFirst())).append(" : ")
- .append(p.getSecond());
+ .append(p.getSecond());
}
// problem when validating
LOG.warn("There was a recoverable bulk load failure likely due to a" +
@@ -4172,7 +4190,6 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
return false;
}
- long seqId = -1;
// We need to assign a sequential ID that's in between two memstores in order to preserve
// the guarantee that all the edits lower than the highest sequential ID from all the
// HFiles are flushed on disk. See HBASE-10958. The sequence id returned when we flush is
@@ -4196,11 +4213,19 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
Store store = getStore(familyName);
try {
String finalPath = path;
- if(bulkLoadListener != null) {
+ if (bulkLoadListener != null) {
finalPath = bulkLoadListener.prepareBulkLoad(familyName, path);
}
store.bulkLoadHFile(finalPath, seqId);
- if(bulkLoadListener != null) {
+
+ if(storeFiles.containsKey(familyName)) {
+ storeFiles.get(familyName).add(new Path(finalPath));
+ } else {
+ List<Path> storeFileNames = new ArrayList<Path>();
+ storeFileNames.add(new Path(finalPath));
+ storeFiles.put(familyName, storeFileNames);
+ }
+ if (bulkLoadListener != null) {
bulkLoadListener.doneBulkLoad(familyName, path);
}
} catch (IOException ioe) {
@@ -4209,20 +4234,38 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
// TODO Need a better story for reverting partial failures due to HDFS.
LOG.error("There was a partial failure due to IO when attempting to" +
- " load " + Bytes.toString(p.getFirst()) + " : "+ p.getSecond(), ioe);
- if(bulkLoadListener != null) {
+ " load " + Bytes.toString(p.getFirst()) + " : " + p.getSecond(), ioe);
+ if (bulkLoadListener != null) {
try {
bulkLoadListener.failedBulkLoad(familyName, path);
} catch (Exception ex) {
- LOG.error("Error while calling failedBulkLoad for family "+
- Bytes.toString(familyName)+" with path "+path, ex);
+ LOG.error("Error while calling failedBulkLoad for family " +
+ Bytes.toString(familyName) + " with path " + path, ex);
}
}
throw ioe;
}
}
+
return true;
} finally {
+ if (wal != null && !storeFiles.isEmpty()) {
+ // write a bulk load event when not all hfiles are loaded
+ try {
+ WALProtos.BulkLoadDescriptor loadDescriptor = ProtobufUtil.toBulkLoadDescriptor(
+ this.getRegionInfo().getTable(),
+ ByteStringer.wrap(this.getRegionInfo().getEncodedNameAsBytes()), storeFiles, seqId);
+ WALUtil.writeBulkLoadMarkerAndSync(wal, this.htableDescriptor, getRegionInfo(),
+ loadDescriptor, sequenceId);
+ } catch (IOException ioe) {
+ if (this.rsServices != null) {
+ // Have to abort region server because some hfiles has been loaded but we can't write
+ // the event into WAL
+ this.rsServices.abort("Failed to write bulk load event into WAL.", ioe);
+ }
+ }
+ }
+
closeBulkRegionOperation();
}
}
@@ -5340,8 +5383,6 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
doProcessRowWithTimeout(
processor, now, this, null, null, timeout);
processor.postProcess(this, walEdit, true);
- } catch (IOException e) {
- throw e;
} finally {
closeRegionOperation();
}
@@ -5460,8 +5501,6 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
// 14. Run post-process hook
processor.postProcess(this, walEdit, walSyncSuccessful);
- } catch (IOException e) {
- throw e;
} finally {
closeRegionOperation();
if (!mutations.isEmpty() &&
@@ -5621,8 +5660,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
}
}
if (cell.getTagsLength() > 0) {
- Iterator<Tag> i = CellUtil.tagsIterator(cell.getTagsArray(), cell.getTagsOffset(),
- cell.getTagsLength());
+ Iterator<Tag> i = CellUtil.tagsIterator(cell.getTagsArray(),
+ cell.getTagsOffset(), cell.getTagsLength());
while (i.hasNext()) {
newTags.add(i.next());
}
@@ -6534,7 +6573,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
}
/**
- * A mocked list implementaion - discards all updates.
+ * A mocked list implementation - discards all updates.
*/
private static final List<Cell> MOCKED_LIST = new AbstractList<Cell>() {
@@ -6785,7 +6824,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { //
}
/**
- * Explictly sync wal
+ * Explicitly sync wal
* @throws IOException
*/
public void syncWal() throws IOException {
http://git-wip-us.apache.org/repos/asf/hbase/blob/3f442773/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java
index 05cead2..39d0536 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.codec.Codec;
import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.protobuf.generated.WALProtos;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.RegionEventDescriptor;
@@ -90,6 +91,7 @@ public class WALEdit implements Writable, HeapSize {
static final byte[] COMPACTION = Bytes.toBytes("HBASE::COMPACTION");
static final byte [] FLUSH = Bytes.toBytes("HBASE::FLUSH");
static final byte [] REGION_EVENT = Bytes.toBytes("HBASE::REGION_EVENT");
+ public static final byte [] BULK_LOAD = Bytes.toBytes("HBASE::BULK_LOAD");
private final int VERSION_2 = -1;
private final boolean isReplay;
@@ -294,7 +296,7 @@ public class WALEdit implements Writable, HeapSize {
}
/**
- * Create a compacion WALEdit
+ * Create a compaction WALEdit
* @param c
* @return A WALEdit that has <code>c</code> serialized as its value
*/
@@ -326,4 +328,33 @@ public class WALEdit implements Writable, HeapSize {
}
return null;
}
-}
+
+ /**
+ * Create a bulk loader WALEdit
+ *
+ * @param hri The HRegionInfo for the region in which we are bulk loading
+ * @param bulkLoadDescriptor The descriptor for the Bulk Loader
+ * @return The WALEdit for the BulkLoad
+ */
+ public static WALEdit createBulkLoadEvent(HRegionInfo hri,
+ WALProtos.BulkLoadDescriptor bulkLoadDescriptor) {
+ KeyValue kv = new KeyValue(getRowForRegion(hri),
+ METAFAMILY,
+ BULK_LOAD,
+ EnvironmentEdgeManager.currentTime(),
+ bulkLoadDescriptor.toByteArray());
+ return new WALEdit().add(kv);
+ }
+
+ /**
+ * Deserialized and returns a BulkLoadDescriptor from the passed in Cell
+ * @param cell the key value
+ * @return deserialized BulkLoadDescriptor or null.
+ */
+ public static WALProtos.BulkLoadDescriptor getBulkLoadDescriptor(Cell cell) throws IOException {
+ if (CellUtil.matchingColumn(cell, METAFAMILY, BULK_LOAD)) {
+ return WALProtos.BulkLoadDescriptor.parseFrom(cell.getValue());
+ }
+ return null;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/3f442773/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALUtil.java
index 5f00643..94ef072 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALUtil.java
@@ -20,14 +20,17 @@
package org.apache.hadoop.hbase.regionserver.wal;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.protobuf.generated.WALProtos;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.RegionEventDescriptor;
@@ -97,5 +100,41 @@ public class WALUtil {
}
return trx;
}
+
+ /**
+ * Write a log marker that a bulk load has succeeded and is about to be committed.
+ *
+ * @param wal The log to write into.
+ * @param htd A description of the table that we are bulk loading into.
+ * @param info A description of the region in the table that we are bulk loading into.
+ * @param descriptor A protocol buffers based description of the client's bulk loading request
+ * @param sequenceId The current sequenceId in the log at the time when we were to write the
+ * bulk load marker.
+ * @return txid of this transaction or if nothing to do, the last txid
+ * @throws IOException We will throw an IOException if we can not append to the HLog.
+ */
+ public static long writeBulkLoadMarkerAndSync(final WAL wal,
+ final HTableDescriptor htd,
+ final HRegionInfo info,
+ final WALProtos.BulkLoadDescriptor descriptor,
+ final AtomicLong sequenceId) throws IOException {
+ TableName tn = info.getTable();
+ WALKey key = new HLogKey(info.getEncodedNameAsBytes(), tn);
+
+ // Add it to the log but the false specifies that we don't need to add it to the memstore
+ long trx = wal.append(htd,
+ info,
+ key,
+ WALEdit.createBulkLoadEvent(info, descriptor),
+ sequenceId,
+ false,
+ new ArrayList<Cell>());
+ wal.sync(trx);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Appended Bulk Load marker " + TextFormat.shortDebugString(descriptor));
+ }
+ return trx;
+ }
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/3f442773/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestBulkLoad.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestBulkLoad.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestBulkLoad.java
new file mode 100644
index 0000000..15dbef5
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestBulkLoad.java
@@ -0,0 +1,312 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileContext;
+import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.protobuf.generated.WALProtos.BulkLoadDescriptor;
+import org.apache.hadoop.hbase.protobuf.generated.WALProtos.StoreDescriptor;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALKey;
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.hamcrest.TypeSafeMatcher;
+import org.jmock.Expectations;
+import org.jmock.integration.junit4.JUnitRuleMockery;
+import org.jmock.lib.concurrent.Synchroniser;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TemporaryFolder;
+import org.junit.rules.TestName;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static java.util.Arrays.asList;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * This class attempts to unit test bulk HLog loading.
+ */
+@Category(SmallTests.class)
+public class TestBulkLoad {
+
+ @ClassRule
+ public static TemporaryFolder testFolder = new TemporaryFolder();
+ @Rule
+ public final JUnitRuleMockery context = new JUnitRuleMockery() {{
+ setThreadingPolicy(new Synchroniser());
+ }};
+ private final WAL log = context.mock(WAL.class);
+ private final Configuration conf = HBaseConfiguration.create();
+ private final Random random = new Random();
+ private final byte[] randomBytes = new byte[100];
+ private final byte[] family1 = Bytes.toBytes("family1");
+ private final byte[] family2 = Bytes.toBytes("family2");
+ private final Expectations callOnce;
+ @Rule
+ public TestName name = new TestName();
+
+ public TestBulkLoad() throws IOException {
+ callOnce = new Expectations() {
+ {
+ oneOf(log).append(with(any(HTableDescriptor.class)), with(any(HRegionInfo.class)),
+ with(any(WALKey.class)), with(bulkLogWalEditType(WALEdit.BULK_LOAD)),
+ with(any(AtomicLong.class)), with(any(boolean.class)), with(any(List.class)));
+ will(returnValue(0l));
+ oneOf(log).sync(with(any(long.class)));
+ }
+ };
+ }
+
+ @Before
+ public void before() throws IOException {
+ random.nextBytes(randomBytes);
+ }
+
+ @Test
+ public void verifyBulkLoadEvent() throws IOException {
+ TableName tableName = TableName.valueOf("test", "test");
+ List<Pair<byte[], String>> familyPaths = withFamilyPathsFor(family1);
+ byte[] familyName = familyPaths.get(0).getFirst();
+ String storeFileName = familyPaths.get(0).getSecond();
+ storeFileName = (new Path(storeFileName)).getName();
+ List<String> storeFileNames = new ArrayList<String>();
+ storeFileNames.add(storeFileName);
+ final Matcher<WALEdit> bulkEventMatcher = bulkLogWalEdit(WALEdit.BULK_LOAD,
+ tableName.toBytes(), familyName, storeFileNames);
+ Expectations expection = new Expectations() {
+ {
+ oneOf(log).append(with(any(HTableDescriptor.class)), with(any(HRegionInfo.class)),
+ with(any(WALKey.class)), with(bulkEventMatcher),
+ with(any(AtomicLong.class)), with(any(boolean.class)), with(any(List.class)));
+ will(returnValue(0l));
+ oneOf(log).sync(with(any(long.class)));
+ }
+ };
+ context.checking(expection);
+ testRegionWithFamiliesAndSpecifiedTableName(tableName, family1)
+ .bulkLoadHFiles(familyPaths, false);
+ }
+
+ @Test
+ public void bulkHLogShouldThrowNoErrorAndWriteMarkerWithBlankInput() throws IOException {
+ testRegionWithFamilies(family1).bulkLoadHFiles(new ArrayList<Pair<byte[], String>>(), false);
+ }
+
+ @Test
+ public void shouldBulkLoadSingleFamilyHLog() throws IOException {
+ context.checking(callOnce);
+ testRegionWithFamilies(family1).bulkLoadHFiles(withFamilyPathsFor(family1), false);
+ }
+
+ @Test
+ public void shouldBulkLoadManyFamilyHLog() throws IOException {
+ context.checking(callOnce);
+ testRegionWithFamilies(family1, family2).bulkLoadHFiles(withFamilyPathsFor(family1, family2),
+ false);
+ }
+
+ @Test
+ public void shouldBulkLoadManyFamilyHLogEvenWhenTableNameNamespaceSpecified() throws IOException {
+ context.checking(callOnce);
+ TableName tableName = TableName.valueOf("test", "test");
+ testRegionWithFamiliesAndSpecifiedTableName(tableName, family1, family2)
+ .bulkLoadHFiles(withFamilyPathsFor(family1, family2), false);
+ }
+
+ @Test(expected = DoNotRetryIOException.class)
+ public void shouldCrashIfBulkLoadFamiliesNotInTable() throws IOException {
+ testRegionWithFamilies(family1).bulkLoadHFiles(withFamilyPathsFor(family1, family2), false);
+ }
+
+ @Test(expected = DoNotRetryIOException.class)
+ public void bulkHLogShouldThrowErrorWhenFamilySpecifiedAndHFileExistsButNotInTableDescriptor()
+ throws IOException {
+ testRegionWithFamilies().bulkLoadHFiles(withFamilyPathsFor(family1), false);
+ }
+
+ @Test(expected = DoNotRetryIOException.class)
+ public void shouldThrowErrorIfBadFamilySpecifiedAsFamilyPath() throws IOException {
+ testRegionWithFamilies()
+ .bulkLoadHFiles(asList(withInvalidColumnFamilyButProperHFileLocation(family1)),
+ false);
+ }
+
+ @Test(expected = FileNotFoundException.class)
+ public void shouldThrowErrorIfHFileDoesNotExist() throws IOException {
+ List<Pair<byte[], String>> list = asList(withMissingHFileForFamily(family1));
+ testRegionWithFamilies(family1).bulkLoadHFiles(list, false);
+ }
+
+ private Pair<byte[], String> withMissingHFileForFamily(byte[] family) {
+ return new Pair<byte[], String>(family, "/tmp/does_not_exist");
+ }
+
+ private Pair<byte[], String> withInvalidColumnFamilyButProperHFileLocation(byte[] family)
+ throws IOException {
+ createHFileForFamilies(family);
+ return new Pair<byte[], String>(new byte[]{0x00, 0x01, 0x02}, "/tmp/does_not_exist");
+ }
+
+
+ private HRegion testRegionWithFamiliesAndSpecifiedTableName(TableName tableName,
+ byte[]... families)
+ throws IOException {
+ HRegionInfo hRegionInfo = new HRegionInfo(tableName);
+ HTableDescriptor hTableDescriptor = new HTableDescriptor(tableName);
+ for (byte[] family : families) {
+ hTableDescriptor.addFamily(new HColumnDescriptor(family));
+ }
+
+ // TODO We need a way to do this without creating files
+ return HRegion.createHRegion(hRegionInfo,
+ new Path(testFolder.newFolder().toURI()),
+ conf,
+ hTableDescriptor,
+ log);
+
+ }
+
+ private HRegion testRegionWithFamilies(byte[]... families) throws IOException {
+ TableName tableName = TableName.valueOf(name.getMethodName());
+ return testRegionWithFamiliesAndSpecifiedTableName(tableName, families);
+ }
+
+ private List<Pair<byte[], String>> getBlankFamilyPaths(){
+ return new ArrayList<Pair<byte[], String>>();
+ }
+
+ private List<Pair<byte[], String>> withFamilyPathsFor(byte[]... families) throws IOException {
+ List<Pair<byte[], String>> familyPaths = getBlankFamilyPaths();
+ for (byte[] family : families) {
+ familyPaths.add(new Pair<byte[], String>(family, createHFileForFamilies(family)));
+ }
+ return familyPaths;
+ }
+
+ private String createHFileForFamilies(byte[] family) throws IOException {
+ HFile.WriterFactory hFileFactory = HFile.getWriterFactoryNoCache(conf);
+ // TODO We need a way to do this without creating files
+ File hFileLocation = testFolder.newFile();
+ hFileFactory.withOutputStream(new FSDataOutputStream(new FileOutputStream(hFileLocation)));
+ hFileFactory.withFileContext(new HFileContext());
+ HFile.Writer writer = hFileFactory.create();
+
+ writer.append(new KeyValue(CellUtil.createCell(randomBytes,
+ family,
+ randomBytes,
+ 0l,
+ KeyValue.Type.Put.getCode(),
+ randomBytes)));
+ writer.close();
+ return hFileLocation.getAbsoluteFile().getAbsolutePath();
+ }
+
+ private static Matcher<WALEdit> bulkLogWalEditType(byte[] typeBytes) {
+ return new WalMatcher(typeBytes);
+ }
+
+ private static Matcher<WALEdit> bulkLogWalEdit(byte[] typeBytes, byte[] tableName,
+ byte[] familyName, List<String> storeFileNames) {
+ return new WalMatcher(typeBytes, tableName, familyName, storeFileNames);
+ }
+
+ private static class WalMatcher extends TypeSafeMatcher<WALEdit> {
+ private final byte[] typeBytes;
+ private final byte[] tableName;
+ private final byte[] familyName;
+ private final List<String> storeFileNames;
+
+ public WalMatcher(byte[] typeBytes) {
+ this(typeBytes, null, null, null);
+ }
+
+ public WalMatcher(byte[] typeBytes, byte[] tableName, byte[] familyName,
+ List<String> storeFileNames) {
+ this.typeBytes = typeBytes;
+ this.tableName = tableName;
+ this.familyName = familyName;
+ this.storeFileNames = storeFileNames;
+ }
+
+ @Override
+ protected boolean matchesSafely(WALEdit item) {
+ assertTrue(Arrays.equals(item.getCells().get(0).getQualifier(), typeBytes));
+ BulkLoadDescriptor desc;
+ try {
+ desc = WALEdit.getBulkLoadDescriptor(item.getCells().get(0));
+ } catch (IOException e) {
+ return false;
+ }
+ assertNotNull(desc);
+
+ if (tableName != null) {
+ assertTrue(Bytes.equals(ProtobufUtil.toTableName(desc.getTableName()).getName(),
+ tableName));
+ }
+
+ if(storeFileNames != null) {
+ int index=0;
+ StoreDescriptor store = desc.getStores(0);
+ assertTrue(Bytes.equals(store.getFamilyName().toByteArray(), familyName));
+ assertTrue(Bytes.equals(Bytes.toBytes(store.getStoreHomeDir()), familyName));
+ assertEquals(storeFileNames.size(), store.getStoreFileCount());
+ for (String storeFile : store.getStoreFileList()) {
+ assertTrue(storeFile.equals(storeFileNames.get(index++)));
+ }
+ }
+
+ return true;
+ }
+
+ @Override
+ public void describeTo(Description description) {
+
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/3f442773/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
index 8b41594..ea06346 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
@@ -124,7 +124,7 @@ import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.RegionEventDescriptor;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor.FlushAction;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor.StoreFlushDescriptor;
-import org.apache.hadoop.hbase.protobuf.generated.WALProtos.RegionEventDescriptor.StoreDescriptor;
+import org.apache.hadoop.hbase.protobuf.generated.WALProtos.StoreDescriptor;
import org.apache.hadoop.hbase.regionserver.HRegion.RegionScannerImpl;
import org.apache.hadoop.hbase.regionserver.HRegion.RowLock;
import org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem;
http://git-wip-us.apache.org/repos/asf/hbase/blob/3f442773/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerBulkLoad.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerBulkLoad.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerBulkLoad.java
index d2bfb52..af49556 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerBulkLoad.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerBulkLoad.java
@@ -17,27 +17,24 @@
*/
package org.apache.hadoop.hbase.regionserver;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.atomic.AtomicLong;
-
+import com.google.common.collect.Lists;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.MultithreadedTestUtil.RepeatingTestThread;
import org.apache.hadoop.hbase.MultithreadedTestUtil.TestContext;
import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.HConnection;
-import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.RegionServerCallable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
@@ -55,15 +52,28 @@ import org.apache.hadoop.hbase.protobuf.RequestConverter;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CompactRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequest;
+import org.apache.hadoop.hbase.regionserver.wal.TestWALActionsListener;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALKey;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThat;
+
/**
* Tests bulk loading of HFiles and shows the atomicity or lack of atomicity of
* the region server's bullkLoad functionality.
@@ -290,7 +300,11 @@ public class TestHRegionServerBulkLoad {
UTIL.startMiniCluster(1);
try {
+ WAL log = UTIL.getHBaseCluster().getRegionServer(0).getWAL(null);
+ FindBulkHBaseListener listener = new FindBulkHBaseListener();
+ log.registerWALActionsListener(listener);
runAtomicBulkloadTest(TABLE_NAME, millisToRun, numScanners);
+ assertThat(listener.isFound(), is(true));
} finally {
UTIL.shutdownMiniCluster();
}
@@ -346,5 +360,25 @@ public class TestHRegionServerBulkLoad {
UTIL = new HBaseTestingUtility(c);
}
+ static class FindBulkHBaseListener extends TestWALActionsListener.DummyWALActionsListener {
+ private boolean found = false;
+
+ @Override
+ public void visitLogEntryBeforeWrite(HTableDescriptor htd, WALKey logKey, WALEdit logEdit) {
+ for (Cell cell : logEdit.getCells()) {
+ KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
+ for (Map.Entry entry : kv.toStringMap().entrySet()) {
+ if (entry.getValue().equals(Bytes.toString(WALEdit.BULK_LOAD))) {
+ found = true;
+ }
+ }
+ }
+ }
+
+ public boolean isFound() {
+ return found;
+ }
+ }
}
+
http://git-wip-us.apache.org/repos/asf/hbase/blob/3f442773/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALActionsListener.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALActionsListener.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALActionsListener.java
index c8629d0..25c83a8 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALActionsListener.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALActionsListener.java
@@ -125,7 +125,7 @@ public class TestWALActionsListener {
/**
* Just counts when methods are called
*/
- static class DummyWALActionsListener extends WALActionsListener.Base {
+ public static class DummyWALActionsListener extends WALActionsListener.Base {
public int preLogRollCounter = 0;
public int postLogRollCounter = 0;
public int closedCount = 0;
http://git-wip-us.apache.org/repos/asf/hbase/blob/3f442773/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 57e23c1..975f1f8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1034,6 +1034,7 @@
<jetty.version>6.1.26</jetty.version>
<jetty.jspapi.version>6.1.14</jetty.jspapi.version>
<jersey.version>1.9</jersey.version>
+ <jmock-junit4.version>2.6.0</jmock-junit4.version>
<jruby.version>1.6.8</jruby.version>
<junit.version>4.11</junit.version>
<hamcrest.version>1.3</hamcrest.version>
@@ -1540,6 +1541,18 @@
<artifactId>disruptor</artifactId>
<version>${disruptor.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.jmock</groupId>
+ <artifactId>jmock-junit4</artifactId>
+ <version>${jmock-junit4.version}</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <artifactId>junit-dep</artifactId>
+ <groupId>junit</groupId>
+ </exclusion>
+ </exclusions>
+ </dependency>
</dependencies>
</dependencyManagement>
<!-- Dependencies needed by subprojects -->
@@ -1563,6 +1576,10 @@
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.jmock</groupId>
+ <artifactId>jmock-junit4</artifactId>
+ </dependency>
</dependencies>
<!--
To publish, use the following settings.xml file ( placed in ~/.m2/settings.xml )