You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ga...@apache.org on 2015/07/01 00:01:07 UTC
[1/3] hive git commit: HIVE-10165 Improve hive-hcatalog-streaming
extensibility and support updates and deletes (Eliot West via gates)
Repository: hive
Updated Branches:
refs/heads/master 3991dba30 -> 994d98c09
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/TestMutations.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/TestMutations.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/TestMutations.java
new file mode 100644
index 0000000..703cef6
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/TestMutations.java
@@ -0,0 +1,544 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.hcatalog.streaming.mutate;
+
+import static org.apache.hive.hcatalog.streaming.TransactionBatch.TxnState.ABORTED;
+import static org.apache.hive.hcatalog.streaming.TransactionBatch.TxnState.COMMITTED;
+import static org.apache.hive.hcatalog.streaming.mutate.StreamingTestUtils.databaseBuilder;
+import static org.apache.hive.hcatalog.streaming.mutate.StreamingTestUtils.tableBuilder;
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.assertThat;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+import org.apache.hive.hcatalog.streaming.TestStreaming;
+import org.apache.hive.hcatalog.streaming.mutate.StreamingAssert.Factory;
+import org.apache.hive.hcatalog.streaming.mutate.StreamingAssert.Record;
+import org.apache.hive.hcatalog.streaming.mutate.StreamingTestUtils.TableBuilder;
+import org.apache.hive.hcatalog.streaming.mutate.client.MutatorClient;
+import org.apache.hive.hcatalog.streaming.mutate.client.MutatorClientBuilder;
+import org.apache.hive.hcatalog.streaming.mutate.client.AcidTable;
+import org.apache.hive.hcatalog.streaming.mutate.client.Transaction;
+import org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver;
+import org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinator;
+import org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinatorBuilder;
+import org.apache.hive.hcatalog.streaming.mutate.worker.MutatorFactory;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+/**
+ * This test is based on {@link TestStreaming} and has a similar core set of tests to ensure that basic transactional
+ * behaviour is as expected in the {@link RecordMutator} line. This is complemented with a set of tests related to the
+ * use of update and delete operations.
+ */
+public class TestMutations {
+
+ private static final List<String> EUROPE_FRANCE = Arrays.asList("Europe", "France");
+ private static final List<String> EUROPE_UK = Arrays.asList("Europe", "UK");
+ private static final List<String> ASIA_INDIA = Arrays.asList("Asia", "India");
+ // id
+ private static final int[] BUCKET_COLUMN_INDEXES = new int[] { 0 };
+ private static final int RECORD_ID_COLUMN = 2;
+
+ @Rule
+ public TemporaryFolder warehouseFolder = new TemporaryFolder();
+
+ private StreamingTestUtils testUtils = new StreamingTestUtils();
+ private HiveConf conf;
+ private IMetaStoreClient metaStoreClient;
+ private String metaStoreUri;
+ private Database database;
+ private TableBuilder partitionedTableBuilder;
+ private TableBuilder unpartitionedTableBuilder;
+ private Factory assertionFactory;
+
+ public TestMutations() throws Exception {
+ conf = testUtils.newHiveConf(metaStoreUri);
+ testUtils.prepareTransactionDatabase(conf);
+ metaStoreClient = testUtils.newMetaStoreClient(conf);
+ assertionFactory = new StreamingAssert.Factory(metaStoreClient, conf);
+ }
+
+ @Before
+ public void setup() throws Exception {
+ database = databaseBuilder(warehouseFolder.getRoot()).name("testing").dropAndCreate(metaStoreClient);
+
+ partitionedTableBuilder = tableBuilder(database)
+ .name("partitioned")
+ .addColumn("id", "int")
+ .addColumn("msg", "string")
+ .partitionKeys("continent", "country");
+
+ unpartitionedTableBuilder = tableBuilder(database)
+ .name("unpartitioned")
+ .addColumn("id", "int")
+ .addColumn("msg", "string");
+ }
+
+ @Test
+ public void testTransactionBatchEmptyCommitPartitioned() throws Exception {
+ Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).create(metaStoreClient);
+
+ MutatorClient client = new MutatorClientBuilder()
+ .addSinkTable(table.getDbName(), table.getTableName(), true)
+ .metaStoreUri(metaStoreUri)
+ .build();
+ client.connect();
+
+ Transaction transaction = client.newTransaction();
+
+ transaction.begin();
+
+ transaction.commit();
+ assertThat(transaction.getState(), is(COMMITTED));
+ client.close();
+ }
+
+ @Test
+ public void testTransactionBatchEmptyCommitUnpartitioned() throws Exception {
+ Table table = unpartitionedTableBuilder.create(metaStoreClient);
+
+ MutatorClient client = new MutatorClientBuilder()
+ .addSinkTable(table.getDbName(), table.getTableName(), false)
+ .metaStoreUri(metaStoreUri)
+ .build();
+ client.connect();
+
+ Transaction transaction = client.newTransaction();
+
+ transaction.begin();
+
+ transaction.commit();
+ assertThat(transaction.getState(), is(COMMITTED));
+ client.close();
+ }
+
+ @Test
+ public void testTransactionBatchEmptyAbortPartitioned() throws Exception {
+ Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).create(metaStoreClient);
+
+ MutatorClient client = new MutatorClientBuilder()
+ .addSinkTable(table.getDbName(), table.getTableName(), true)
+ .metaStoreUri(metaStoreUri)
+ .build();
+ client.connect();
+
+ Transaction transaction = client.newTransaction();
+
+ List<AcidTable> destinations = client.getTables();
+
+ transaction.begin();
+
+ MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN,
+ BUCKET_COLUMN_INDEXES);
+ MutatorCoordinator coordinator = new MutatorCoordinatorBuilder()
+ .metaStoreUri(metaStoreUri)
+ .table(destinations.get(0))
+ .mutatorFactory(mutatorFactory)
+ .build();
+
+ coordinator.close();
+
+ transaction.abort();
+ assertThat(transaction.getState(), is(ABORTED));
+ client.close();
+ }
+
+ @Test
+ public void testTransactionBatchEmptyAbortUnartitioned() throws Exception {
+ Table table = unpartitionedTableBuilder.create(metaStoreClient);
+
+ MutatorClient client = new MutatorClientBuilder()
+ .addSinkTable(table.getDbName(), table.getTableName(), false)
+ .metaStoreUri(metaStoreUri)
+ .build();
+ client.connect();
+
+ Transaction transaction = client.newTransaction();
+
+ List<AcidTable> destinations = client.getTables();
+
+ transaction.begin();
+
+ MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN,
+ BUCKET_COLUMN_INDEXES);
+ MutatorCoordinator coordinator = new MutatorCoordinatorBuilder()
+ .metaStoreUri(metaStoreUri)
+ .table(destinations.get(0))
+ .mutatorFactory(mutatorFactory)
+ .build();
+
+ coordinator.close();
+
+ transaction.abort();
+ assertThat(transaction.getState(), is(ABORTED));
+ client.close();
+ }
+
+ @Test
+ public void testTransactionBatchCommitPartitioned() throws Exception {
+ Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).create(metaStoreClient);
+
+ MutatorClient client = new MutatorClientBuilder()
+ .addSinkTable(table.getDbName(), table.getTableName(), true)
+ .metaStoreUri(metaStoreUri)
+ .build();
+ client.connect();
+
+ Transaction transaction = client.newTransaction();
+
+ List<AcidTable> destinations = client.getTables();
+
+ transaction.begin();
+
+ MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN,
+ BUCKET_COLUMN_INDEXES);
+ MutatorCoordinator coordinator = new MutatorCoordinatorBuilder()
+ .metaStoreUri(metaStoreUri)
+ .table(destinations.get(0))
+ .mutatorFactory(mutatorFactory)
+ .build();
+
+ BucketIdResolver bucketIdAppender = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
+ MutableRecord record = (MutableRecord) bucketIdAppender.attachBucketIdToRecord(new MutableRecord(1,
+ "Hello streaming"));
+ coordinator.insert(ASIA_INDIA, record);
+ coordinator.close();
+
+ transaction.commit();
+
+ StreamingAssert streamingAssertions = assertionFactory.newStreamingAssert(table, ASIA_INDIA);
+ streamingAssertions.assertMinTransactionId(1L);
+ streamingAssertions.assertMaxTransactionId(1L);
+ streamingAssertions.assertExpectedFileCount(1);
+
+ List<Record> readRecords = streamingAssertions.readRecords();
+ assertThat(readRecords.size(), is(1));
+ assertThat(readRecords.get(0).getRow(), is("{1, Hello streaming}"));
+ assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 0L)));
+
+ assertThat(transaction.getState(), is(COMMITTED));
+ client.close();
+ }
+
+ @Test
+ public void testMulti() throws Exception {
+ Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).create(metaStoreClient);
+
+ MutatorClient client = new MutatorClientBuilder()
+ .addSinkTable(table.getDbName(), table.getTableName(), true)
+ .metaStoreUri(metaStoreUri)
+ .build();
+ client.connect();
+
+ Transaction transaction = client.newTransaction();
+
+ List<AcidTable> destinations = client.getTables();
+
+ transaction.begin();
+
+ MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN,
+ BUCKET_COLUMN_INDEXES);
+ MutatorCoordinator coordinator = new MutatorCoordinatorBuilder()
+ .metaStoreUri(metaStoreUri)
+ .table(destinations.get(0))
+ .mutatorFactory(mutatorFactory)
+ .build();
+
+ BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
+ MutableRecord asiaIndiaRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1,
+ "Hello streaming"));
+ MutableRecord europeUkRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(2,
+ "Hello streaming"));
+ MutableRecord europeFranceRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(3,
+ "Hello streaming"));
+ MutableRecord europeFranceRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(4,
+ "Bonjour streaming"));
+
+ coordinator.insert(ASIA_INDIA, asiaIndiaRecord1);
+ coordinator.insert(EUROPE_UK, europeUkRecord1);
+ coordinator.insert(EUROPE_FRANCE, europeFranceRecord1);
+ coordinator.insert(EUROPE_FRANCE, europeFranceRecord2);
+ coordinator.close();
+
+ transaction.commit();
+
+ // ASIA_INDIA
+ StreamingAssert streamingAssertions = assertionFactory.newStreamingAssert(table, ASIA_INDIA);
+ streamingAssertions.assertMinTransactionId(1L);
+ streamingAssertions.assertMaxTransactionId(1L);
+ streamingAssertions.assertExpectedFileCount(1);
+
+ List<Record> readRecords = streamingAssertions.readRecords();
+ assertThat(readRecords.size(), is(1));
+ assertThat(readRecords.get(0).getRow(), is("{1, Hello streaming}"));
+ assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 0L)));
+
+ // EUROPE_UK
+ streamingAssertions = assertionFactory.newStreamingAssert(table, EUROPE_UK);
+ streamingAssertions.assertMinTransactionId(1L);
+ streamingAssertions.assertMaxTransactionId(1L);
+ streamingAssertions.assertExpectedFileCount(1);
+
+ readRecords = streamingAssertions.readRecords();
+ assertThat(readRecords.size(), is(1));
+ assertThat(readRecords.get(0).getRow(), is("{2, Hello streaming}"));
+ assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 0L)));
+
+ // EUROPE_FRANCE
+ streamingAssertions = assertionFactory.newStreamingAssert(table, EUROPE_FRANCE);
+ streamingAssertions.assertMinTransactionId(1L);
+ streamingAssertions.assertMaxTransactionId(1L);
+ streamingAssertions.assertExpectedFileCount(1);
+
+ readRecords = streamingAssertions.readRecords();
+ assertThat(readRecords.size(), is(2));
+ assertThat(readRecords.get(0).getRow(), is("{3, Hello streaming}"));
+ assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 0L)));
+ assertThat(readRecords.get(1).getRow(), is("{4, Bonjour streaming}"));
+ assertThat(readRecords.get(1).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 1L)));
+
+ client.close();
+ }
+
+ @Test
+ public void testTransactionBatchCommitUnpartitioned() throws Exception {
+ Table table = unpartitionedTableBuilder.create(metaStoreClient);
+
+ MutatorClient client = new MutatorClientBuilder()
+ .addSinkTable(table.getDbName(), table.getTableName(), false)
+ .metaStoreUri(metaStoreUri)
+ .build();
+ client.connect();
+
+ Transaction transaction = client.newTransaction();
+
+ List<AcidTable> destinations = client.getTables();
+
+ transaction.begin();
+
+ MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN,
+ BUCKET_COLUMN_INDEXES);
+ MutatorCoordinator coordinator = new MutatorCoordinatorBuilder()
+ .metaStoreUri(metaStoreUri)
+ .table(destinations.get(0))
+ .mutatorFactory(mutatorFactory)
+ .build();
+
+ BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
+ MutableRecord record = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1,
+ "Hello streaming"));
+
+ coordinator.insert(Collections.<String> emptyList(), record);
+ coordinator.close();
+
+ transaction.commit();
+
+ StreamingAssert streamingAssertions = assertionFactory.newStreamingAssert(table);
+ streamingAssertions.assertMinTransactionId(1L);
+ streamingAssertions.assertMaxTransactionId(1L);
+ streamingAssertions.assertExpectedFileCount(1);
+
+ List<Record> readRecords = streamingAssertions.readRecords();
+ assertThat(readRecords.size(), is(1));
+ assertThat(readRecords.get(0).getRow(), is("{1, Hello streaming}"));
+ assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 0L)));
+
+ assertThat(transaction.getState(), is(COMMITTED));
+ client.close();
+ }
+
+ @Test
+ public void testTransactionBatchAbort() throws Exception {
+ Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).create(metaStoreClient);
+
+ MutatorClient client = new MutatorClientBuilder()
+ .addSinkTable(table.getDbName(), table.getTableName(), true)
+ .metaStoreUri(metaStoreUri)
+ .build();
+ client.connect();
+
+ Transaction transaction = client.newTransaction();
+
+ List<AcidTable> destinations = client.getTables();
+
+ transaction.begin();
+
+ MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN,
+ BUCKET_COLUMN_INDEXES);
+ MutatorCoordinator coordinator = new MutatorCoordinatorBuilder()
+ .metaStoreUri(metaStoreUri)
+ .table(destinations.get(0))
+ .mutatorFactory(mutatorFactory)
+ .build();
+
+ BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
+ MutableRecord record1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1,
+ "Hello streaming"));
+ MutableRecord record2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(2,
+ "Welcome to streaming"));
+
+ coordinator.insert(ASIA_INDIA, record1);
+ coordinator.insert(ASIA_INDIA, record2);
+ coordinator.close();
+
+ transaction.abort();
+
+ assertThat(transaction.getState(), is(ABORTED));
+
+ client.close();
+
+ StreamingAssert streamingAssertions = assertionFactory.newStreamingAssert(table, ASIA_INDIA);
+ streamingAssertions.assertNothingWritten();
+ }
+
+ @Test
+ public void testUpdatesAndDeletes() throws Exception {
+ // Set up some base data then stream some inserts/updates/deletes to a number of partitions
+ MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN,
+ BUCKET_COLUMN_INDEXES);
+
+ // INSERT DATA
+ //
+ Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).addPartition(EUROPE_FRANCE).create(metaStoreClient);
+
+ MutatorClient client = new MutatorClientBuilder()
+ .addSinkTable(table.getDbName(), table.getTableName(), true)
+ .metaStoreUri(metaStoreUri)
+ .build();
+ client.connect();
+
+ Transaction insertTransaction = client.newTransaction();
+
+ List<AcidTable> destinations = client.getTables();
+
+ insertTransaction.begin();
+
+ MutatorCoordinator insertCoordinator = new MutatorCoordinatorBuilder()
+ .metaStoreUri(metaStoreUri)
+ .table(destinations.get(0))
+ .mutatorFactory(mutatorFactory)
+ .build();
+
+ BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
+ MutableRecord asiaIndiaRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1,
+ "Namaste streaming 1"));
+ MutableRecord asiaIndiaRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(2,
+ "Namaste streaming 2"));
+ MutableRecord europeUkRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(3,
+ "Hello streaming 1"));
+ MutableRecord europeUkRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(4,
+ "Hello streaming 2"));
+ MutableRecord europeFranceRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(5,
+ "Bonjour streaming 1"));
+ MutableRecord europeFranceRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(6,
+ "Bonjour streaming 2"));
+
+ insertCoordinator.insert(ASIA_INDIA, asiaIndiaRecord1);
+ insertCoordinator.insert(ASIA_INDIA, asiaIndiaRecord2);
+ insertCoordinator.insert(EUROPE_UK, europeUkRecord1);
+ insertCoordinator.insert(EUROPE_UK, europeUkRecord2);
+ insertCoordinator.insert(EUROPE_FRANCE, europeFranceRecord1);
+ insertCoordinator.insert(EUROPE_FRANCE, europeFranceRecord2);
+ insertCoordinator.close();
+
+ insertTransaction.commit();
+
+ assertThat(insertTransaction.getState(), is(COMMITTED));
+ client.close();
+
+ // MUTATE DATA
+ //
+ client = new MutatorClientBuilder()
+ .addSinkTable(table.getDbName(), table.getTableName(), true)
+ .metaStoreUri(metaStoreUri)
+ .build();
+ client.connect();
+
+ Transaction mutateTransaction = client.newTransaction();
+
+ destinations = client.getTables();
+
+ mutateTransaction.begin();
+
+ MutatorCoordinator mutateCoordinator = new MutatorCoordinatorBuilder()
+ .metaStoreUri(metaStoreUri)
+ .table(destinations.get(0))
+ .mutatorFactory(mutatorFactory)
+ .build();
+
+ bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
+ MutableRecord asiaIndiaRecord3 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(20,
+ "Namaste streaming 3"));
+
+ mutateCoordinator.update(ASIA_INDIA, new MutableRecord(2, "UPDATED: Namaste streaming 2", new RecordIdentifier(1L,
+ 0, 1L)));
+ mutateCoordinator.insert(ASIA_INDIA, asiaIndiaRecord3);
+ mutateCoordinator.delete(EUROPE_UK, new MutableRecord(3, "Hello streaming 1", new RecordIdentifier(1L, 0, 0L)));
+ mutateCoordinator.delete(EUROPE_FRANCE,
+ new MutableRecord(5, "Bonjour streaming 1", new RecordIdentifier(1L, 0, 0L)));
+ mutateCoordinator.update(EUROPE_FRANCE, new MutableRecord(6, "UPDATED: Bonjour streaming 2", new RecordIdentifier(
+ 1L, 0, 1L)));
+ mutateCoordinator.close();
+
+ mutateTransaction.commit();
+
+ assertThat(mutateTransaction.getState(), is(COMMITTED));
+
+ StreamingAssert indiaAssertions = assertionFactory.newStreamingAssert(table, ASIA_INDIA);
+ indiaAssertions.assertMinTransactionId(1L);
+ indiaAssertions.assertMaxTransactionId(2L);
+ List<Record> indiaRecords = indiaAssertions.readRecords();
+ assertThat(indiaRecords.size(), is(3));
+ assertThat(indiaRecords.get(0).getRow(), is("{1, Namaste streaming 1}"));
+ assertThat(indiaRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 0L)));
+ assertThat(indiaRecords.get(1).getRow(), is("{2, UPDATED: Namaste streaming 2}"));
+ assertThat(indiaRecords.get(1).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 1L)));
+ assertThat(indiaRecords.get(2).getRow(), is("{20, Namaste streaming 3}"));
+ assertThat(indiaRecords.get(2).getRecordIdentifier(), is(new RecordIdentifier(2L, 0, 0L)));
+
+ StreamingAssert ukAssertions = assertionFactory.newStreamingAssert(table, EUROPE_UK);
+ ukAssertions.assertMinTransactionId(1L);
+ ukAssertions.assertMaxTransactionId(2L);
+ List<Record> ukRecords = ukAssertions.readRecords();
+ assertThat(ukRecords.size(), is(1));
+ assertThat(ukRecords.get(0).getRow(), is("{4, Hello streaming 2}"));
+ assertThat(ukRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 1L)));
+
+ StreamingAssert franceAssertions = assertionFactory.newStreamingAssert(table, EUROPE_FRANCE);
+ franceAssertions.assertMinTransactionId(1L);
+ franceAssertions.assertMaxTransactionId(2L);
+ List<Record> franceRecords = franceAssertions.readRecords();
+ assertThat(franceRecords.size(), is(1));
+ assertThat(franceRecords.get(0).getRow(), is("{6, UPDATED: Bonjour streaming 2}"));
+ assertThat(franceRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 1L)));
+
+ client.close();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/TestAcidTableSerializer.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/TestAcidTableSerializer.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/TestAcidTableSerializer.java
new file mode 100644
index 0000000..706697a
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/TestAcidTableSerializer.java
@@ -0,0 +1,66 @@
+package org.apache.hive.hcatalog.streaming.mutate.client;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.CoreMatchers.nullValue;
+import static org.junit.Assert.assertThat;
+
+import java.io.File;
+
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hive.hcatalog.streaming.mutate.StreamingTestUtils;
+import org.junit.Test;
+
+public class TestAcidTableSerializer {
+
+ @Test
+ public void testSerializeDeserialize() throws Exception {
+ Database database = StreamingTestUtils.databaseBuilder(new File("/tmp")).name("db_1").build();
+ Table table = StreamingTestUtils
+ .tableBuilder(database)
+ .name("table_1")
+ .addColumn("one", "string")
+ .addColumn("two", "integer")
+ .partitionKeys("partition")
+ .addPartition("p1")
+ .buckets(10)
+ .build();
+
+ AcidTable acidTable = new AcidTable("db_1", "table_1", true, TableType.SINK);
+ acidTable.setTable(table);
+ acidTable.setTransactionId(42L);
+
+ String encoded = AcidTableSerializer.encode(acidTable);
+ System.out.println(encoded);
+ AcidTable decoded = AcidTableSerializer.decode(encoded);
+
+ assertThat(decoded.getDatabaseName(), is("db_1"));
+ assertThat(decoded.getTableName(), is("table_1"));
+ assertThat(decoded.createPartitions(), is(true));
+ assertThat(decoded.getOutputFormatName(), is("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"));
+ assertThat(decoded.getTotalBuckets(), is(10));
+ assertThat(decoded.getQualifiedName(), is("DB_1.TABLE_1"));
+ assertThat(decoded.getTransactionId(), is(42L));
+ assertThat(decoded.getTableType(), is(TableType.SINK));
+ assertThat(decoded.getTable(), is(table));
+ }
+
+ @Test
+ public void testSerializeDeserializeNoTableNoTransaction() throws Exception {
+ AcidTable acidTable = new AcidTable("db_1", "table_1", true, TableType.SINK);
+
+ String encoded = AcidTableSerializer.encode(acidTable);
+ AcidTable decoded = AcidTableSerializer.decode(encoded);
+
+ assertThat(decoded.getDatabaseName(), is("db_1"));
+ assertThat(decoded.getTableName(), is("table_1"));
+ assertThat(decoded.createPartitions(), is(true));
+ assertThat(decoded.getOutputFormatName(), is(nullValue()));
+ assertThat(decoded.getTotalBuckets(), is(0));
+ assertThat(decoded.getQualifiedName(), is("DB_1.TABLE_1"));
+ assertThat(decoded.getTransactionId(), is(0L));
+ assertThat(decoded.getTableType(), is(TableType.SINK));
+ assertThat(decoded.getTable(), is(nullValue()));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/TestMutatorClient.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/TestMutatorClient.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/TestMutatorClient.java
new file mode 100644
index 0000000..ca3f7b2
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/TestMutatorClient.java
@@ -0,0 +1,176 @@
+package org.apache.hive.hcatalog.streaming.mutate.client;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
+import org.apache.hive.hcatalog.streaming.TransactionBatch.TxnState;
+import org.apache.hive.hcatalog.streaming.mutate.client.lock.Lock;
+import org.apache.hive.hcatalog.streaming.mutate.client.lock.LockFailureListener;
+import org.apache.thrift.TException;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+@RunWith(MockitoJUnitRunner.class)
+public class TestMutatorClient {
+
+ private static final long TRANSACTION_ID = 42L;
+ private static final String TABLE_NAME_1 = "TABLE_1";
+ private static final String TABLE_NAME_2 = "TABLE_2";
+ private static final String DB_NAME = "DB_1";
+ private static final String USER = "user";
+ private static final AcidTable TABLE_1 = new AcidTable(DB_NAME, TABLE_NAME_1, true, TableType.SINK);
+ private static final AcidTable TABLE_2 = new AcidTable(DB_NAME, TABLE_NAME_2, true, TableType.SINK);
+
+ @Mock
+ private IMetaStoreClient mockMetaStoreClient;
+ @Mock
+ private Lock mockLock;
+ @Mock
+ private Table mockTable1, mockTable2;
+ @Mock
+ private StorageDescriptor mockSd;
+ @Mock
+ private Map<String, String> mockParameters;
+ @Mock
+ private HiveConf mockConfiguration;
+ @Mock
+ private LockFailureListener mockLockFailureListener;
+
+ private MutatorClient client;
+
+ @Before
+ public void configureMocks() throws Exception {
+ when(mockMetaStoreClient.getTable(DB_NAME, TABLE_NAME_1)).thenReturn(mockTable1);
+ when(mockTable1.getDbName()).thenReturn(DB_NAME);
+ when(mockTable1.getTableName()).thenReturn(TABLE_NAME_1);
+ when(mockTable1.getSd()).thenReturn(mockSd);
+ when(mockTable1.getParameters()).thenReturn(mockParameters);
+ when(mockMetaStoreClient.getTable(DB_NAME, TABLE_NAME_2)).thenReturn(mockTable2);
+ when(mockTable2.getDbName()).thenReturn(DB_NAME);
+ when(mockTable2.getTableName()).thenReturn(TABLE_NAME_2);
+ when(mockTable2.getSd()).thenReturn(mockSd);
+ when(mockTable2.getParameters()).thenReturn(mockParameters);
+ when(mockSd.getNumBuckets()).thenReturn(1, 2);
+ when(mockSd.getOutputFormat()).thenReturn(OrcOutputFormat.class.getName());
+ when(mockParameters.get("transactional")).thenReturn(Boolean.TRUE.toString());
+
+ when(mockMetaStoreClient.openTxn(USER)).thenReturn(TRANSACTION_ID);
+
+ client = new MutatorClient(mockMetaStoreClient, mockConfiguration, mockLockFailureListener, USER,
+ Collections.singletonList(TABLE_1));
+ }
+
+ @Test
+ public void testCheckValidTableConnect() throws Exception {
+ List<AcidTable> inTables = new ArrayList<>();
+ inTables.add(TABLE_1);
+ inTables.add(TABLE_2);
+ client = new MutatorClient(mockMetaStoreClient, mockConfiguration, mockLockFailureListener, USER, inTables);
+
+ client.connect();
+ List<AcidTable> outTables = client.getTables();
+
+ assertThat(client.isConnected(), is(true));
+ assertThat(outTables.size(), is(2));
+ assertThat(outTables.get(0).getDatabaseName(), is(DB_NAME));
+ assertThat(outTables.get(0).getTableName(), is(TABLE_NAME_1));
+ assertThat(outTables.get(0).getTotalBuckets(), is(2));
+ assertThat(outTables.get(0).getOutputFormatName(), is(OrcOutputFormat.class.getName()));
+ assertThat(outTables.get(0).getTransactionId(), is(0L));
+ assertThat(outTables.get(0).getTable(), is(mockTable1));
+ assertThat(outTables.get(1).getDatabaseName(), is(DB_NAME));
+ assertThat(outTables.get(1).getTableName(), is(TABLE_NAME_2));
+ assertThat(outTables.get(1).getTotalBuckets(), is(2));
+ assertThat(outTables.get(1).getOutputFormatName(), is(OrcOutputFormat.class.getName()));
+ assertThat(outTables.get(1).getTransactionId(), is(0L));
+ assertThat(outTables.get(1).getTable(), is(mockTable2));
+ }
+
+ @Test
+ public void testCheckNonTransactionalTableConnect() throws Exception {
+ when(mockParameters.get("transactional")).thenReturn(Boolean.FALSE.toString());
+
+ try {
+ client.connect();
+ fail();
+ } catch (ConnectionException e) {
+ }
+
+ assertThat(client.isConnected(), is(false));
+ }
+
+ @Test
+ public void testCheckUnBucketedTableConnect() throws Exception {
+ when(mockSd.getNumBuckets()).thenReturn(0);
+
+ try {
+ client.connect();
+ fail();
+ } catch (ConnectionException e) {
+ }
+
+ assertThat(client.isConnected(), is(false));
+ }
+
+ @Test
+ public void testMetaStoreFailsOnConnect() throws Exception {
+ when(mockMetaStoreClient.getTable(anyString(), anyString())).thenThrow(new TException());
+
+ try {
+ client.connect();
+ fail();
+ } catch (ConnectionException e) {
+ }
+
+ assertThat(client.isConnected(), is(false));
+ }
+
+ @Test(expected = ConnectionException.class)
+ public void testGetDestinationsFailsIfNotConnected() throws Exception {
+ client.getTables();
+ }
+
+ @Test
+ public void testNewTransaction() throws Exception {
+ List<AcidTable> inTables = new ArrayList<>();
+ inTables.add(TABLE_1);
+ inTables.add(TABLE_2);
+ client = new MutatorClient(mockMetaStoreClient, mockConfiguration, mockLockFailureListener, USER, inTables);
+
+ client.connect();
+ Transaction transaction = client.newTransaction();
+ List<AcidTable> outTables = client.getTables();
+
+ assertThat(client.isConnected(), is(true));
+
+ assertThat(transaction.getTransactionId(), is(TRANSACTION_ID));
+ assertThat(transaction.getState(), is(TxnState.INACTIVE));
+ assertThat(outTables.get(0).getTransactionId(), is(TRANSACTION_ID));
+ assertThat(outTables.get(1).getTransactionId(), is(TRANSACTION_ID));
+ }
+
+ @Test
+ public void testCloseClosesClient() throws Exception {
+ client.close();
+ assertThat(client.isConnected(), is(false));
+ verify(mockMetaStoreClient).close();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/TestTransaction.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/TestTransaction.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/TestTransaction.java
new file mode 100644
index 0000000..179207a
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/TestTransaction.java
@@ -0,0 +1,95 @@
+package org.apache.hive.hcatalog.streaming.mutate.client;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hive.hcatalog.streaming.TransactionBatch;
+import org.apache.hive.hcatalog.streaming.mutate.client.lock.Lock;
+import org.apache.hive.hcatalog.streaming.mutate.client.lock.LockException;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+@RunWith(MockitoJUnitRunner.class)
+public class TestTransaction {
+
+ private static final String USER = "user";
+ private static final long TRANSACTION_ID = 10L;
+
+ @Mock
+ private Lock mockLock;
+ @Mock
+ private IMetaStoreClient mockMetaStoreClient;
+
+ private Transaction transaction;
+
+ @Before
+ public void createTransaction() throws Exception {
+ when(mockLock.getUser()).thenReturn(USER);
+ when(mockMetaStoreClient.openTxn(USER)).thenReturn(TRANSACTION_ID);
+ transaction = new Transaction(mockMetaStoreClient, mockLock);
+ }
+
+ @Test
+ public void testInitialState() {
+ assertThat(transaction.getState(), is(TransactionBatch.TxnState.INACTIVE));
+ assertThat(transaction.getTransactionId(), is(TRANSACTION_ID));
+ }
+
+ @Test
+ public void testBegin() throws Exception {
+ transaction.begin();
+
+ verify(mockLock).acquire(TRANSACTION_ID);
+ assertThat(transaction.getState(), is(TransactionBatch.TxnState.OPEN));
+ }
+
+ @Test
+ public void testBeginLockFails() throws Exception {
+ doThrow(new LockException("")).when(mockLock).acquire(TRANSACTION_ID);
+
+ try {
+ transaction.begin();
+ } catch (TransactionException ignore) {
+ }
+
+ assertThat(transaction.getState(), is(TransactionBatch.TxnState.INACTIVE));
+ }
+
+ @Test
+ public void testCommit() throws Exception {
+ transaction.commit();
+
+ verify(mockLock).release();
+ verify(mockMetaStoreClient).commitTxn(TRANSACTION_ID);
+ assertThat(transaction.getState(), is(TransactionBatch.TxnState.COMMITTED));
+ }
+
+ @Test(expected = TransactionException.class)
+ public void testCommitLockFails() throws Exception {
+ doThrow(new LockException("")).when(mockLock).release();
+ transaction.commit();
+ }
+
+ @Test
+ public void testAbort() throws Exception {
+ transaction.abort();
+
+ verify(mockLock).release();
+ verify(mockMetaStoreClient).rollbackTxn(TRANSACTION_ID);
+ assertThat(transaction.getState(), is(TransactionBatch.TxnState.ABORTED));
+ }
+
+ @Test(expected = TransactionException.class)
+ public void testAbortLockFails() throws Exception {
+ doThrow(new LockException("")).when(mockLock).release();
+ transaction.abort();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestHeartbeatTimerTask.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestHeartbeatTimerTask.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestHeartbeatTimerTask.java
new file mode 100644
index 0000000..8e6d06e
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestHeartbeatTimerTask.java
@@ -0,0 +1,100 @@
+package org.apache.hive.hcatalog.streaming.mutate.client.lock;
+
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.verify;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
+import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
+import org.apache.thrift.TException;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+@RunWith(MockitoJUnitRunner.class)
+public class TestHeartbeatTimerTask {
+
+ private static final long TRANSACTION_ID = 10L;
+ private static final long LOCK_ID = 1L;
+ private static final List<Table> TABLES = createTable();
+
+ @Mock
+ private IMetaStoreClient mockMetaStoreClient;
+ @Mock
+ private LockFailureListener mockListener;
+
+ private HeartbeatTimerTask task;
+
+ @Before
+ public void create() throws Exception {
+ task = new HeartbeatTimerTask(mockMetaStoreClient, mockListener, TRANSACTION_ID, TABLES, LOCK_ID);
+ }
+
+ @Test
+ public void testRun() throws Exception {
+ task.run();
+
+ verify(mockMetaStoreClient).heartbeat(TRANSACTION_ID, LOCK_ID);
+ }
+
+ @Test
+ public void testRunNullTransactionId() throws Exception {
+ task = new HeartbeatTimerTask(mockMetaStoreClient, mockListener, null, TABLES, LOCK_ID);
+
+ task.run();
+
+ verify(mockMetaStoreClient).heartbeat(0, LOCK_ID);
+ }
+
+ @Test
+ public void testRunHeartbeatFailsNoSuchLockException() throws Exception {
+ NoSuchLockException exception = new NoSuchLockException();
+ doThrow(exception).when(mockMetaStoreClient).heartbeat(TRANSACTION_ID, LOCK_ID);
+
+ task.run();
+
+ verify(mockListener).lockFailed(LOCK_ID, TRANSACTION_ID, Arrays.asList("DB.TABLE"), exception);
+ }
+
+ @Test
+ public void testRunHeartbeatFailsNoSuchTxnException() throws Exception {
+ NoSuchTxnException exception = new NoSuchTxnException();
+ doThrow(exception).when(mockMetaStoreClient).heartbeat(TRANSACTION_ID, LOCK_ID);
+
+ task.run();
+
+ verify(mockListener).lockFailed(LOCK_ID, TRANSACTION_ID, Arrays.asList("DB.TABLE"), exception);
+ }
+
+ @Test
+ public void testRunHeartbeatFailsTxnAbortedException() throws Exception {
+ TxnAbortedException exception = new TxnAbortedException();
+ doThrow(exception).when(mockMetaStoreClient).heartbeat(TRANSACTION_ID, LOCK_ID);
+
+ task.run();
+
+ verify(mockListener).lockFailed(LOCK_ID, TRANSACTION_ID, Arrays.asList("DB.TABLE"), exception);
+ }
+
+ @Test
+ public void testRunHeartbeatFailsTException() throws Exception {
+ TException exception = new TException();
+ doThrow(exception).when(mockMetaStoreClient).heartbeat(TRANSACTION_ID, LOCK_ID);
+
+ task.run();
+ }
+
+ private static List<Table> createTable() {
+ Table table = new Table();
+ table.setDbName("DB");
+ table.setTableName("TABLE");
+ return Arrays.asList(table);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java
new file mode 100644
index 0000000..ef1e80c
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java
@@ -0,0 +1,283 @@
+package org.apache.hive.hcatalog.streaming.mutate.client.lock;
+
+import static org.apache.hadoop.hive.metastore.api.LockState.ABORT;
+import static org.apache.hadoop.hive.metastore.api.LockState.ACQUIRED;
+import static org.apache.hadoop.hive.metastore.api.LockState.NOT_ACQUIRED;
+import static org.apache.hadoop.hive.metastore.api.LockState.WAITING;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Matchers.anyLong;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.verifyZeroInteractions;
+import static org.mockito.Mockito.when;
+
+import java.net.InetAddress;
+import java.util.Collection;
+import java.util.List;
+import java.util.Timer;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.LockComponent;
+import org.apache.hadoop.hive.metastore.api.LockLevel;
+import org.apache.hadoop.hive.metastore.api.LockRequest;
+import org.apache.hadoop.hive.metastore.api.LockResponse;
+import org.apache.hadoop.hive.metastore.api.LockType;
+import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
+import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
+import org.apache.thrift.TException;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Captor;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+import com.google.common.collect.ImmutableList;
+
+@RunWith(MockitoJUnitRunner.class)
+public class TestLock {
+
+ private static final Table TABLE_1 = createTable("DB", "ONE");
+ private static final Table TABLE_2 = createTable("DB", "TWO");
+ private static final List<Table> TABLES = ImmutableList.of(TABLE_1, TABLE_2);
+ private static final long LOCK_ID = 42;
+ private static final long TRANSACTION_ID = 109;
+ private static final String USER = "ewest";
+
+ @Mock
+ private IMetaStoreClient mockMetaStoreClient;
+ @Mock
+ private LockFailureListener mockListener;
+ @Mock
+ private LockResponse mockLockResponse;
+ @Mock
+ private HeartbeatFactory mockHeartbeatFactory;
+ @Mock
+ private Timer mockHeartbeat;
+ @Captor
+ private ArgumentCaptor<LockRequest> requestCaptor;
+
+ private Lock lock;
+ private HiveConf configuration = new HiveConf();
+
+ @Before
+ public void injectMocks() throws Exception {
+ when(mockMetaStoreClient.lock(any(LockRequest.class))).thenReturn(mockLockResponse);
+ when(mockLockResponse.getLockid()).thenReturn(LOCK_ID);
+ when(mockLockResponse.getState()).thenReturn(ACQUIRED);
+ when(
+ mockHeartbeatFactory.newInstance(any(IMetaStoreClient.class), any(LockFailureListener.class), any(Long.class),
+ any(Collection.class), anyLong(), anyInt())).thenReturn(mockHeartbeat);
+
+ lock = new Lock(mockMetaStoreClient, mockHeartbeatFactory, configuration, mockListener, USER, TABLES, 3, 0);
+ }
+
+ @Test
+ public void testAcquireReadLockWithNoIssues() throws Exception {
+ lock.acquire();
+ assertEquals(Long.valueOf(LOCK_ID), lock.getLockId());
+ assertNull(lock.getTransactionId());
+ }
+
+ @Test
+ public void testAcquireTxnLockWithNoIssues() throws Exception {
+ lock.acquire(TRANSACTION_ID);
+ assertEquals(Long.valueOf(LOCK_ID), lock.getLockId());
+ assertEquals(Long.valueOf(TRANSACTION_ID), lock.getTransactionId());
+ }
+
+ @Test
+ public void testAcquireReadLockCheckHeartbeatCreated() throws Exception {
+ configuration.set("hive.txn.timeout", "100s");
+ lock.acquire();
+
+ verify(mockHeartbeatFactory).newInstance(eq(mockMetaStoreClient), eq(mockListener), any(Long.class), eq(TABLES),
+ eq(LOCK_ID), eq(75));
+ }
+
+ @Test
+ public void testAcquireTxnLockCheckHeartbeatCreated() throws Exception {
+ configuration.set("hive.txn.timeout", "100s");
+ lock.acquire(TRANSACTION_ID);
+
+ verify(mockHeartbeatFactory).newInstance(eq(mockMetaStoreClient), eq(mockListener), eq(TRANSACTION_ID), eq(TABLES),
+ eq(LOCK_ID), eq(75));
+ }
+
+ @Test
+ public void testAcquireLockCheckUser() throws Exception {
+ lock.acquire();
+ verify(mockMetaStoreClient).lock(requestCaptor.capture());
+ LockRequest actualRequest = requestCaptor.getValue();
+ assertEquals(USER, actualRequest.getUser());
+ }
+
+ @Test
+ public void testAcquireReadLockCheckLocks() throws Exception {
+ lock.acquire();
+ verify(mockMetaStoreClient).lock(requestCaptor.capture());
+
+ LockRequest request = requestCaptor.getValue();
+ assertEquals(0, request.getTxnid());
+ assertEquals(USER, request.getUser());
+ assertEquals(InetAddress.getLocalHost().getHostName(), request.getHostname());
+
+ List<LockComponent> components = request.getComponent();
+
+ assertEquals(2, components.size());
+
+ LockComponent expected1 = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "DB");
+ expected1.setTablename("ONE");
+ assertTrue(components.contains(expected1));
+
+ LockComponent expected2 = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "DB");
+ expected2.setTablename("TWO");
+ assertTrue(components.contains(expected2));
+ }
+
+ @Test
+ public void testAcquireTxnLockCheckLocks() throws Exception {
+ lock.acquire(TRANSACTION_ID);
+ verify(mockMetaStoreClient).lock(requestCaptor.capture());
+
+ LockRequest request = requestCaptor.getValue();
+ assertEquals(TRANSACTION_ID, request.getTxnid());
+ assertEquals(USER, request.getUser());
+ assertEquals(InetAddress.getLocalHost().getHostName(), request.getHostname());
+
+ List<LockComponent> components = request.getComponent();
+
+ System.out.println(components);
+ assertEquals(2, components.size());
+
+ LockComponent expected1 = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "DB");
+ expected1.setTablename("ONE");
+ assertTrue(components.contains(expected1));
+
+ LockComponent expected2 = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "DB");
+ expected2.setTablename("TWO");
+ assertTrue(components.contains(expected2));
+ }
+
+ @Test(expected = LockException.class)
+ public void testAcquireLockNotAcquired() throws Exception {
+ when(mockLockResponse.getState()).thenReturn(NOT_ACQUIRED);
+ lock.acquire();
+ }
+
+ @Test(expected = LockException.class)
+ public void testAcquireLockAborted() throws Exception {
+ when(mockLockResponse.getState()).thenReturn(ABORT);
+ lock.acquire();
+ }
+
+ @Test(expected = LockException.class)
+ public void testAcquireLockWithWaitRetriesExceeded() throws Exception {
+ when(mockLockResponse.getState()).thenReturn(WAITING, WAITING, WAITING);
+ lock.acquire();
+ }
+
+ @Test
+ public void testAcquireLockWithWaitRetries() throws Exception {
+ when(mockLockResponse.getState()).thenReturn(WAITING, WAITING, ACQUIRED);
+ lock.acquire();
+ assertEquals(Long.valueOf(LOCK_ID), lock.getLockId());
+ }
+
+ @Test
+ public void testReleaseLock() throws Exception {
+ lock.acquire();
+ lock.release();
+ verify(mockMetaStoreClient).unlock(LOCK_ID);
+ }
+
+ @Test
+ public void testReleaseLockNoLock() throws Exception {
+ lock.release();
+ verifyNoMoreInteractions(mockMetaStoreClient);
+ }
+
+ @Test
+ public void testReleaseLockCancelsHeartbeat() throws Exception {
+ lock.acquire();
+ lock.release();
+ verify(mockHeartbeat).cancel();
+ }
+
+ @Test
+ public void testReadHeartbeat() throws Exception {
+ HeartbeatTimerTask task = new HeartbeatTimerTask(mockMetaStoreClient, mockListener, null, TABLES, LOCK_ID);
+ task.run();
+ verify(mockMetaStoreClient).heartbeat(0, LOCK_ID);
+ }
+
+ @Test
+ public void testTxnHeartbeat() throws Exception {
+ HeartbeatTimerTask task = new HeartbeatTimerTask(mockMetaStoreClient, mockListener, TRANSACTION_ID, TABLES, LOCK_ID);
+ task.run();
+ verify(mockMetaStoreClient).heartbeat(TRANSACTION_ID, LOCK_ID);
+ }
+
+ @Test
+ public void testReadHeartbeatFailsNoSuchLockException() throws Exception {
+ Throwable t = new NoSuchLockException();
+ doThrow(t).when(mockMetaStoreClient).heartbeat(0, LOCK_ID);
+ HeartbeatTimerTask task = new HeartbeatTimerTask(mockMetaStoreClient, mockListener, null, TABLES, LOCK_ID);
+ task.run();
+ verify(mockListener).lockFailed(LOCK_ID, null, Lock.asStrings(TABLES), t);
+ }
+
+ @Test
+ public void testTxnHeartbeatFailsNoSuchLockException() throws Exception {
+ Throwable t = new NoSuchLockException();
+ doThrow(t).when(mockMetaStoreClient).heartbeat(TRANSACTION_ID, LOCK_ID);
+ HeartbeatTimerTask task = new HeartbeatTimerTask(mockMetaStoreClient, mockListener, TRANSACTION_ID, TABLES, LOCK_ID);
+ task.run();
+ verify(mockListener).lockFailed(LOCK_ID, TRANSACTION_ID, Lock.asStrings(TABLES), t);
+ }
+
+ @Test
+ public void testHeartbeatFailsNoSuchTxnException() throws Exception {
+ Throwable t = new NoSuchTxnException();
+ doThrow(t).when(mockMetaStoreClient).heartbeat(TRANSACTION_ID, LOCK_ID);
+ HeartbeatTimerTask task = new HeartbeatTimerTask(mockMetaStoreClient, mockListener, TRANSACTION_ID, TABLES, LOCK_ID);
+ task.run();
+ verify(mockListener).lockFailed(LOCK_ID, TRANSACTION_ID, Lock.asStrings(TABLES), t);
+ }
+
+ @Test
+ public void testHeartbeatFailsTxnAbortedException() throws Exception {
+ Throwable t = new TxnAbortedException();
+ doThrow(t).when(mockMetaStoreClient).heartbeat(TRANSACTION_ID, LOCK_ID);
+ HeartbeatTimerTask task = new HeartbeatTimerTask(mockMetaStoreClient, mockListener, TRANSACTION_ID, TABLES, LOCK_ID);
+ task.run();
+ verify(mockListener).lockFailed(LOCK_ID, TRANSACTION_ID, Lock.asStrings(TABLES), t);
+ }
+
+ @Test
+ public void testHeartbeatContinuesTException() throws Exception {
+ Throwable t = new TException();
+ doThrow(t).when(mockMetaStoreClient).heartbeat(0, LOCK_ID);
+ HeartbeatTimerTask task = new HeartbeatTimerTask(mockMetaStoreClient, mockListener, TRANSACTION_ID, TABLES, LOCK_ID);
+ task.run();
+ verifyZeroInteractions(mockListener);
+ }
+
+ private static Table createTable(String databaseName, String tableName) {
+ Table table = new Table();
+ table.setDbName(databaseName);
+ table.setTableName(tableName);
+ return table;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestBucketIdResolverImpl.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestBucketIdResolverImpl.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestBucketIdResolverImpl.java
new file mode 100644
index 0000000..f81373e
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestBucketIdResolverImpl.java
@@ -0,0 +1,38 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.assertThat;
+
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hive.hcatalog.streaming.mutate.MutableRecord;
+import org.junit.Test;
+
+public class TestBucketIdResolverImpl {
+
+ private static final int TOTAL_BUCKETS = 12;
+ private static final int RECORD_ID_COLUMN = 2;
+ // id - TODO: use a non-zero index to check for offset errors.
+ private static final int[] BUCKET_COLUMN_INDEXES = new int[] { 0 };
+
+ private BucketIdResolver capturingBucketIdResolver = new BucketIdResolverImpl(
+ ObjectInspectorFactory.getReflectionObjectInspector(MutableRecord.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA), RECORD_ID_COLUMN, TOTAL_BUCKETS, BUCKET_COLUMN_INDEXES);
+
+ @Test
+ public void testAttachBucketIdToRecord() {
+ MutableRecord record = new MutableRecord(1, "hello");
+ capturingBucketIdResolver.attachBucketIdToRecord(record);
+ assertThat(record.rowId, is(new RecordIdentifier(-1L, 8, -1L)));
+ assertThat(record.id, is(1));
+ assertThat(record.msg.toString(), is("hello"));
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testNoBucketColumns() {
+ new BucketIdResolverImpl(ObjectInspectorFactory.getReflectionObjectInspector(MutableRecord.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA), RECORD_ID_COLUMN, TOTAL_BUCKETS, new int[0]);
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestGroupingValidator.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestGroupingValidator.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestGroupingValidator.java
new file mode 100644
index 0000000..74fa59b
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestGroupingValidator.java
@@ -0,0 +1,70 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+import java.util.Collections;
+
+import org.junit.Test;
+
+public class TestGroupingValidator {
+
+ private GroupingValidator validator = new GroupingValidator();
+
+ @Test
+ public void uniqueGroups() {
+ assertTrue(validator.isInSequence(Arrays.asList("a", "A"), 1));
+ assertTrue(validator.isInSequence(Arrays.asList("c", "C"), 3));
+ assertTrue(validator.isInSequence(Arrays.asList("b", "B"), 2));
+ }
+
+ @Test
+ public void sameGroup() {
+ assertTrue(validator.isInSequence(Arrays.asList("a", "A"), 1));
+ assertTrue(validator.isInSequence(Arrays.asList("a", "A"), 1));
+ assertTrue(validator.isInSequence(Arrays.asList("a", "A"), 1));
+ }
+
+ @Test
+ public void revisitedGroup() {
+ assertTrue(validator.isInSequence(Arrays.asList("a", "A"), 1));
+ assertTrue(validator.isInSequence(Arrays.asList("c", "C"), 3));
+ assertFalse(validator.isInSequence(Arrays.asList("a", "A"), 1));
+ }
+
+ @Test
+ public void samePartitionDifferentBucket() {
+ assertTrue(validator.isInSequence(Arrays.asList("a", "A"), 1));
+ assertTrue(validator.isInSequence(Arrays.asList("c", "C"), 3));
+ assertTrue(validator.isInSequence(Arrays.asList("a", "A"), 2));
+ }
+
+ @Test
+ public void sameBucketDifferentPartition() {
+ assertTrue(validator.isInSequence(Arrays.asList("a", "A"), 1));
+ assertTrue(validator.isInSequence(Arrays.asList("c", "C"), 3));
+ assertTrue(validator.isInSequence(Arrays.asList("b", "B"), 1));
+ }
+
+ @Test
+ public void uniqueGroupsNoPartition() {
+ assertTrue(validator.isInSequence(Collections.<String> emptyList(), 1));
+ assertTrue(validator.isInSequence(Collections.<String> emptyList(), 3));
+ assertTrue(validator.isInSequence(Collections.<String> emptyList(), 2));
+ }
+
+ @Test
+ public void sameGroupNoPartition() {
+ assertTrue(validator.isInSequence(Collections.<String> emptyList(), 1));
+ assertTrue(validator.isInSequence(Collections.<String> emptyList(), 1));
+ assertTrue(validator.isInSequence(Collections.<String> emptyList(), 1));
+ }
+
+ @Test
+ public void revisitedGroupNoPartition() {
+ assertTrue(validator.isInSequence(Collections.<String> emptyList(), 1));
+ assertTrue(validator.isInSequence(Collections.<String> emptyList(), 3));
+ assertFalse(validator.isInSequence(Collections.<String> emptyList(), 1));
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestMutatorCoordinator.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestMutatorCoordinator.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestMutatorCoordinator.java
new file mode 100644
index 0000000..6e9ffa2
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestMutatorCoordinator.java
@@ -0,0 +1,234 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Matchers.anyLong;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyZeroInteractions;
+import static org.mockito.Mockito.when;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
+import org.apache.hive.hcatalog.streaming.mutate.client.AcidTable;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+@RunWith(MockitoJUnitRunner.class)
+public class TestMutatorCoordinator {
+
+ private static final List<String> UNPARTITIONED = Collections.<String> emptyList();
+ private static final List<String> PARTITION_B = Arrays.asList("B");
+ private static final List<String> PARTITION_A = Arrays.asList("A");
+ private static final long TRANSACTION_ID = 2L;
+ private static final int BUCKET_ID = 0;
+ private static final Path PATH_A = new Path("X");
+ private static final Path PATH_B = new Path("B");
+ private static final Object RECORD = "RECORD";
+ private static final RecordIdentifier ROW__ID_B0_R0 = new RecordIdentifier(10L, BUCKET_ID, 0L);
+ private static final RecordIdentifier ROW__ID_B0_R1 = new RecordIdentifier(10L, BUCKET_ID, 1L);
+ private static final RecordIdentifier ROW__ID_B1_R0 = new RecordIdentifier(10L, BUCKET_ID + 1, 0L);
+ private static final RecordIdentifier ROW__ID_INSERT = new RecordIdentifier(-1L, BUCKET_ID, -1L);
+
+ @Mock
+ private IMetaStoreClient mockMetaStoreClient;
+ @Mock
+ private MutatorFactory mockMutatorFactory;
+ @Mock
+ private CreatePartitionHelper mockPartitionHelper;
+ @Mock
+ private GroupingValidator mockGroupingValidator;
+ @Mock
+ private SequenceValidator mockSequenceValidator;
+ @Mock
+ private AcidTable mockAcidTable;
+ @Mock
+ private RecordInspector mockRecordInspector;
+ @Mock
+ private BucketIdResolver mockBucketIdResolver;
+ @Mock
+ private Mutator mockMutator;
+
+ private MutatorCoordinator coordinator;
+
+ private HiveConf configuration = new HiveConf();
+
+ @Before
+ public void createCoordinator() throws Exception {
+ when(mockAcidTable.getOutputFormatName()).thenReturn(OrcOutputFormat.class.getName());
+ when(mockAcidTable.getTotalBuckets()).thenReturn(1);
+ when(mockAcidTable.getTransactionId()).thenReturn(TRANSACTION_ID);
+ when(mockAcidTable.createPartitions()).thenReturn(true);
+ when(mockMutatorFactory.newRecordInspector()).thenReturn(mockRecordInspector);
+ when(mockMutatorFactory.newBucketIdResolver(anyInt())).thenReturn(mockBucketIdResolver);
+ when(mockMutatorFactory.newMutator(any(OrcOutputFormat.class), anyLong(), any(Path.class), anyInt())).thenReturn(
+ mockMutator);
+ when(mockPartitionHelper.getPathForPartition(any(List.class))).thenReturn(PATH_A);
+ when(mockRecordInspector.extractRecordIdentifier(RECORD)).thenReturn(ROW__ID_INSERT);
+ when(mockSequenceValidator.isInSequence(any(RecordIdentifier.class))).thenReturn(true);
+ when(mockGroupingValidator.isInSequence(any(List.class), anyInt())).thenReturn(true);
+
+ coordinator = new MutatorCoordinator(mockMetaStoreClient, configuration, mockMutatorFactory, mockPartitionHelper,
+ mockGroupingValidator, mockSequenceValidator, mockAcidTable, false);
+ }
+
+ @Test
+ public void insert() throws Exception {
+ coordinator.insert(UNPARTITIONED, RECORD);
+
+ verify(mockPartitionHelper).createPartitionIfNotExists(UNPARTITIONED);
+ verify(mockMutatorFactory).newMutator(any(OrcOutputFormat.class), eq(TRANSACTION_ID), eq(PATH_A), eq(BUCKET_ID));
+ verify(mockMutator).insert(RECORD);
+ }
+
+ @Test
+ public void multipleInserts() throws Exception {
+ coordinator.insert(UNPARTITIONED, RECORD);
+ coordinator.insert(UNPARTITIONED, RECORD);
+ coordinator.insert(UNPARTITIONED, RECORD);
+
+ verify(mockPartitionHelper).createPartitionIfNotExists(UNPARTITIONED);
+ verify(mockMutatorFactory).newMutator(any(OrcOutputFormat.class), eq(TRANSACTION_ID), eq(PATH_A), eq(BUCKET_ID));
+ verify(mockMutator, times(3)).insert(RECORD);
+ }
+
+ @Test
+ public void insertPartitionChanges() throws Exception {
+ when(mockPartitionHelper.getPathForPartition(PARTITION_A)).thenReturn(PATH_A);
+ when(mockPartitionHelper.getPathForPartition(PARTITION_B)).thenReturn(PATH_B);
+
+ coordinator.insert(PARTITION_A, RECORD);
+ coordinator.insert(PARTITION_B, RECORD);
+
+ verify(mockPartitionHelper).createPartitionIfNotExists(PARTITION_A);
+ verify(mockPartitionHelper).createPartitionIfNotExists(PARTITION_B);
+ verify(mockMutatorFactory).newMutator(any(OrcOutputFormat.class), eq(TRANSACTION_ID), eq(PATH_A), eq(BUCKET_ID));
+ verify(mockMutatorFactory).newMutator(any(OrcOutputFormat.class), eq(TRANSACTION_ID), eq(PATH_B), eq(BUCKET_ID));
+ verify(mockMutator, times(2)).insert(RECORD);
+ }
+
+ @Test
+ public void bucketChanges() throws Exception {
+ when(mockRecordInspector.extractRecordIdentifier(RECORD)).thenReturn(ROW__ID_B0_R0, ROW__ID_B1_R0);
+
+ when(mockBucketIdResolver.computeBucketId(RECORD)).thenReturn(0, 1);
+
+ coordinator.update(UNPARTITIONED, RECORD);
+ coordinator.delete(UNPARTITIONED, RECORD);
+
+ verify(mockPartitionHelper).createPartitionIfNotExists(UNPARTITIONED);
+ verify(mockMutatorFactory).newMutator(any(OrcOutputFormat.class), eq(TRANSACTION_ID), eq(PATH_A), eq(BUCKET_ID));
+ verify(mockMutatorFactory)
+ .newMutator(any(OrcOutputFormat.class), eq(TRANSACTION_ID), eq(PATH_A), eq(BUCKET_ID + 1));
+ verify(mockMutator).update(RECORD);
+ verify(mockMutator).delete(RECORD);
+ }
+
+ @Test
+ public void partitionThenBucketChanges() throws Exception {
+ when(mockRecordInspector.extractRecordIdentifier(RECORD)).thenReturn(ROW__ID_B0_R0, ROW__ID_B0_R1, ROW__ID_B1_R0,
+ ROW__ID_INSERT);
+
+ when(mockBucketIdResolver.computeBucketId(RECORD)).thenReturn(0, 0, 1, 0);
+
+ when(mockPartitionHelper.getPathForPartition(PARTITION_A)).thenReturn(PATH_A);
+ when(mockPartitionHelper.getPathForPartition(PARTITION_B)).thenReturn(PATH_B);
+
+ coordinator.update(PARTITION_A, RECORD);
+ coordinator.delete(PARTITION_B, RECORD);
+ coordinator.update(PARTITION_B, RECORD);
+ coordinator.insert(PARTITION_B, RECORD);
+
+ verify(mockPartitionHelper).createPartitionIfNotExists(PARTITION_A);
+ verify(mockPartitionHelper).createPartitionIfNotExists(PARTITION_B);
+ verify(mockMutatorFactory).newMutator(any(OrcOutputFormat.class), eq(TRANSACTION_ID), eq(PATH_A), eq(BUCKET_ID));
+ verify(mockMutatorFactory, times(2)).newMutator(any(OrcOutputFormat.class), eq(TRANSACTION_ID), eq(PATH_B),
+ eq(BUCKET_ID));
+ verify(mockMutatorFactory)
+ .newMutator(any(OrcOutputFormat.class), eq(TRANSACTION_ID), eq(PATH_B), eq(BUCKET_ID + 1));
+ verify(mockMutator, times(2)).update(RECORD);
+ verify(mockMutator).delete(RECORD);
+ verify(mockMutator).insert(RECORD);
+ verify(mockSequenceValidator, times(4)).reset();
+ }
+
+ @Test(expected = RecordSequenceException.class)
+ public void outOfSequence() throws Exception {
+ when(mockSequenceValidator.isInSequence(any(RecordIdentifier.class))).thenReturn(false);
+
+ coordinator.update(UNPARTITIONED, RECORD);
+ coordinator.delete(UNPARTITIONED, RECORD);
+
+ verify(mockPartitionHelper).createPartitionIfNotExists(UNPARTITIONED);
+ verify(mockMutatorFactory).newMutator(any(OrcOutputFormat.class), eq(TRANSACTION_ID), eq(PATH_A), eq(BUCKET_ID));
+ verify(mockMutator).update(RECORD);
+ verify(mockMutator).delete(RECORD);
+ }
+
+ @Test(expected = GroupRevisitedException.class)
+ public void revisitGroup() throws Exception {
+ when(mockGroupingValidator.isInSequence(any(List.class), anyInt())).thenReturn(false);
+
+ coordinator.update(UNPARTITIONED, RECORD);
+ coordinator.delete(UNPARTITIONED, RECORD);
+
+ verify(mockPartitionHelper).createPartitionIfNotExists(UNPARTITIONED);
+ verify(mockMutatorFactory).newMutator(any(OrcOutputFormat.class), eq(TRANSACTION_ID), eq(PATH_A), eq(BUCKET_ID));
+ verify(mockMutator).update(RECORD);
+ verify(mockMutator).delete(RECORD);
+ }
+
+ @Test(expected = BucketIdException.class)
+ public void insertWithBadBucket() throws Exception {
+ when(mockRecordInspector.extractRecordIdentifier(RECORD)).thenReturn(ROW__ID_B0_R0);
+
+ when(mockBucketIdResolver.computeBucketId(RECORD)).thenReturn(1);
+
+ coordinator.insert(UNPARTITIONED, RECORD);
+ }
+
+ @Test(expected = BucketIdException.class)
+ public void updateWithBadBucket() throws Exception {
+ when(mockRecordInspector.extractRecordIdentifier(RECORD)).thenReturn(ROW__ID_B0_R0);
+
+ when(mockBucketIdResolver.computeBucketId(RECORD)).thenReturn(1);
+
+ coordinator.update(UNPARTITIONED, RECORD);
+ }
+
+ @Test
+ public void deleteWithBadBucket() throws Exception {
+ when(mockRecordInspector.extractRecordIdentifier(RECORD)).thenReturn(ROW__ID_B0_R0);
+
+ when(mockBucketIdResolver.computeBucketId(RECORD)).thenReturn(1);
+
+ coordinator.delete(UNPARTITIONED, RECORD);
+ }
+
+ @Test
+ public void closeNoRecords() throws Exception {
+ coordinator.close();
+
+ // No mutator created
+ verifyZeroInteractions(mockMutator);
+ }
+
+ @Test
+ public void closeUsedCoordinator() throws Exception {
+ coordinator.insert(UNPARTITIONED, RECORD);
+ coordinator.close();
+
+ verify(mockMutator).close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestMutatorImpl.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestMutatorImpl.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestMutatorImpl.java
new file mode 100644
index 0000000..b29c763
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestMutatorImpl.java
@@ -0,0 +1,99 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat.Options;
+import org.apache.hadoop.hive.ql.io.RecordUpdater;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Captor;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+@RunWith(MockitoJUnitRunner.class)
+public class TestMutatorImpl {
+
+ private static final Object RECORD = new Object();
+ private static final int RECORD_ID_COLUMN = 2;
+ private static final int BUCKET_ID = 0;
+ private static final Path PATH = new Path("X");
+ private static final long TRANSACTION_ID = 1L;
+
+ @Mock
+ private AcidOutputFormat<?, ?> mockOutputFormat;
+ @Mock
+ private ObjectInspector mockObjectInspector;
+ @Mock
+ private RecordUpdater mockRecordUpdater;
+ @Captor
+ private ArgumentCaptor<AcidOutputFormat.Options> captureOptions;
+
+ private final HiveConf configuration = new HiveConf();
+
+ private Mutator mutator;
+
+ @Before
+ public void injectMocks() throws IOException {
+ when(mockOutputFormat.getRecordUpdater(eq(PATH), any(Options.class))).thenReturn(mockRecordUpdater);
+ mutator = new MutatorImpl(configuration, RECORD_ID_COLUMN, mockObjectInspector, mockOutputFormat, TRANSACTION_ID,
+ PATH, BUCKET_ID);
+ }
+
+ @Test
+ public void testCreatesRecordReader() throws IOException {
+ verify(mockOutputFormat).getRecordUpdater(eq(PATH), captureOptions.capture());
+ Options options = captureOptions.getValue();
+ assertThat(options.getBucket(), is(BUCKET_ID));
+ assertThat(options.getConfiguration(), is((Configuration) configuration));
+ assertThat(options.getInspector(), is(mockObjectInspector));
+ assertThat(options.getRecordIdColumn(), is(RECORD_ID_COLUMN));
+ assertThat(options.getMinimumTransactionId(), is(TRANSACTION_ID));
+ assertThat(options.getMaximumTransactionId(), is(TRANSACTION_ID));
+ }
+
+ @Test
+ public void testInsertDelegates() throws IOException {
+ mutator.insert(RECORD);
+ verify(mockRecordUpdater).insert(TRANSACTION_ID, RECORD);
+ }
+
+ @Test
+ public void testUpdateDelegates() throws IOException {
+ mutator.update(RECORD);
+ verify(mockRecordUpdater).update(TRANSACTION_ID, RECORD);
+ }
+
+ @Test
+ public void testDeleteDelegates() throws IOException {
+ mutator.delete(RECORD);
+ verify(mockRecordUpdater).delete(TRANSACTION_ID, RECORD);
+ }
+
+ @Test
+ public void testCloseDelegates() throws IOException {
+ mutator.close();
+ verify(mockRecordUpdater).close(false);
+ }
+
+ @Test
+ public void testFlushDoesNothing() throws IOException {
+ mutator.flush();
+ verify(mockRecordUpdater, never()).flush();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestRecordInspectorImpl.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestRecordInspectorImpl.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestRecordInspectorImpl.java
new file mode 100644
index 0000000..389ad33
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestRecordInspectorImpl.java
@@ -0,0 +1,31 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.assertThat;
+
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hive.hcatalog.streaming.mutate.MutableRecord;
+import org.junit.Test;
+
+public class TestRecordInspectorImpl {
+
+ private static final int ROW_ID_COLUMN = 2;
+
+ private RecordInspectorImpl inspector = new RecordInspectorImpl(ObjectInspectorFactory.getReflectionObjectInspector(
+ MutableRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA), ROW_ID_COLUMN);
+
+ @Test
+ public void testExtractRecordIdentifier() {
+ RecordIdentifier recordIdentifier = new RecordIdentifier(10L, 4, 20L);
+ MutableRecord record = new MutableRecord(1, "hello", recordIdentifier);
+ assertThat(inspector.extractRecordIdentifier(record), is(recordIdentifier));
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testNotAStructObjectInspector() {
+ new RecordInspectorImpl(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, 2);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestSequenceValidator.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestSequenceValidator.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestSequenceValidator.java
new file mode 100644
index 0000000..33f9606
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestSequenceValidator.java
@@ -0,0 +1,91 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.assertThat;
+
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+import org.junit.Test;
+
+public class TestSequenceValidator {
+
+ private static final int BUCKET_ID = 1;
+
+ private SequenceValidator validator = new SequenceValidator();
+
+ @Test
+ public void testSingleInSequence() {
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 0)), is(true));
+ }
+
+ @Test
+ public void testRowIdInSequence() {
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 0)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 1)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 4)), is(true));
+ }
+
+ @Test
+ public void testTxIdInSequence() {
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 0)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(1L, BUCKET_ID, 0)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(4L, BUCKET_ID, 0)), is(true));
+ }
+
+ @Test
+ public void testMixedInSequence() {
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 0)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 1)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(1L, BUCKET_ID, 0)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(1L, BUCKET_ID, 1)), is(true));
+ }
+
+ @Test
+ public void testNegativeTxId() {
+ assertThat(validator.isInSequence(new RecordIdentifier(-1L, BUCKET_ID, 0)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 0)), is(true));
+ }
+
+ @Test
+ public void testNegativeRowId() {
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, -1)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 0)), is(true));
+ }
+
+ @Test
+ public void testRowIdOutOfSequence() {
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 0)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 4)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 1)), is(false));
+ }
+
+ @Test
+ public void testReset() {
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 0)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 4)), is(true));
+ // New partition for example
+ validator.reset();
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 1)), is(true));
+ }
+
+ @Test
+ public void testTxIdOutOfSequence() {
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 0)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(4L, BUCKET_ID, 0)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(1L, BUCKET_ID, 0)), is(false));
+ }
+
+ @Test
+ public void testMixedOutOfSequence() {
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 0)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(1L, BUCKET_ID, 4)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(1L, BUCKET_ID, 0)), is(false));
+ assertThat(validator.isInSequence(new RecordIdentifier(1L, BUCKET_ID, 5)), is(true));
+ assertThat(validator.isInSequence(new RecordIdentifier(0L, BUCKET_ID, 6)), is(false));
+ }
+
+ @Test(expected = NullPointerException.class)
+ public void testNullRecordIdentifier() {
+ validator.isInSequence(null);
+ }
+
+}
[3/3] hive git commit: HIVE-10165 Improve hive-hcatalog-streaming
extensibility and support updates and deletes (Eliot West via gates)
Posted by ga...@apache.org.
HIVE-10165 Improve hive-hcatalog-streaming extensibility and support updates and deletes (Eliot West via gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/994d98c0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/994d98c0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/994d98c0
Branch: refs/heads/master
Commit: 994d98c0963ee48c2abbfee6f389d75c0223c8f1
Parents: 3991dba
Author: Alan Gates <ga...@hortonworks.com>
Authored: Tue Jun 30 14:59:55 2015 -0700
Committer: Alan Gates <ga...@hortonworks.com>
Committed: Tue Jun 30 14:59:55 2015 -0700
----------------------------------------------------------------------
.gitignore | 1 +
hcatalog/streaming/pom.xml | 6 +
.../streaming/mutate/HiveConfFactory.java | 63 +++
.../mutate/UgiMetaStoreClientFactory.java | 102 ++++
.../streaming/mutate/client/AcidTable.java | 112 ++++
.../mutate/client/AcidTableSerializer.java | 100 ++++
.../mutate/client/ClientException.java | 15 +
.../mutate/client/ConnectionException.java | 15 +
.../streaming/mutate/client/MutatorClient.java | 140 +++++
.../mutate/client/MutatorClientBuilder.java | 115 ++++
.../streaming/mutate/client/TableType.java | 37 ++
.../streaming/mutate/client/Transaction.java | 114 ++++
.../mutate/client/TransactionException.java | 15 +
.../mutate/client/lock/HeartbeatFactory.java | 30 +
.../mutate/client/lock/HeartbeatTimerTask.java | 66 +++
.../streaming/mutate/client/lock/Lock.java | 282 ++++++++++
.../mutate/client/lock/LockException.java | 15 +
.../mutate/client/lock/LockFailureListener.java | 26 +
.../mutate/doc-files/system-overview.dot | 27 +
.../hive/hcatalog/streaming/mutate/package.html | 495 +++++++++++++++++
.../mutate/worker/BucketIdException.java | 11 +
.../mutate/worker/BucketIdResolver.java | 11 +
.../mutate/worker/BucketIdResolverImpl.java | 76 +++
.../mutate/worker/CreatePartitionHelper.java | 83 +++
.../mutate/worker/GroupRevisitedException.java | 11 +
.../mutate/worker/GroupingValidator.java | 74 +++
.../streaming/mutate/worker/Mutator.java | 21 +
.../mutate/worker/MutatorCoordinator.java | 281 ++++++++++
.../worker/MutatorCoordinatorBuilder.java | 76 +++
.../streaming/mutate/worker/MutatorFactory.java | 16 +
.../streaming/mutate/worker/MutatorImpl.java | 84 +++
.../streaming/mutate/worker/OperationType.java | 7 +
.../worker/PartitionCreationException.java | 15 +
.../mutate/worker/RecordInspector.java | 11 +
.../mutate/worker/RecordInspectorImpl.java | 45 ++
.../mutate/worker/RecordSequenceException.java | 11 +
.../mutate/worker/SequenceValidator.java | 49 ++
.../mutate/worker/WorkerException.java | 15 +
.../streaming/mutate/ExampleUseCase.java | 82 +++
.../streaming/mutate/MutableRecord.java | 50 ++
.../mutate/ReflectiveMutatorFactory.java | 51 ++
.../streaming/mutate/StreamingAssert.java | 191 +++++++
.../streaming/mutate/StreamingTestUtils.java | 261 +++++++++
.../streaming/mutate/TestMutations.java | 544 +++++++++++++++++++
.../mutate/client/TestAcidTableSerializer.java | 66 +++
.../mutate/client/TestMutatorClient.java | 176 ++++++
.../mutate/client/TestTransaction.java | 95 ++++
.../client/lock/TestHeartbeatTimerTask.java | 100 ++++
.../streaming/mutate/client/lock/TestLock.java | 283 ++++++++++
.../mutate/worker/TestBucketIdResolverImpl.java | 38 ++
.../mutate/worker/TestGroupingValidator.java | 70 +++
.../mutate/worker/TestMutatorCoordinator.java | 234 ++++++++
.../mutate/worker/TestMutatorImpl.java | 99 ++++
.../mutate/worker/TestRecordInspectorImpl.java | 31 ++
.../mutate/worker/TestSequenceValidator.java | 91 ++++
55 files changed, 5135 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index c5decaf..4d341a0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,4 @@ hcatalog/webhcat/java-client/target
hcatalog/storage-handlers/hbase/target
hcatalog/webhcat/svr/target
conf/hive-default.xml.template
+.DS_Store
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/pom.xml
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/pom.xml b/hcatalog/streaming/pom.xml
index 2135e89..6d03ce1 100644
--- a/hcatalog/streaming/pom.xml
+++ b/hcatalog/streaming/pom.xml
@@ -89,6 +89,12 @@
<optional>true</optional>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-lang3</artifactId>
+ <optional>true</optional>
+ <version>3.3.2</version>
+ </dependency>
<!-- test -->
<dependency>
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/HiveConfFactory.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/HiveConfFactory.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/HiveConfFactory.java
new file mode 100644
index 0000000..fcf446c
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/HiveConfFactory.java
@@ -0,0 +1,63 @@
+package org.apache.hive.hcatalog.streaming.mutate;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Creates/configures {@link HiveConf} instances with required ACID attributes. */
+public class HiveConfFactory {
+
+ private static final Logger LOG = LoggerFactory.getLogger(HiveConfFactory.class);
+ private static final String TRANSACTION_MANAGER = "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager";
+
+ public static HiveConf newInstance(Configuration configuration, Class<?> clazz, String metaStoreUri) {
+ HiveConf hiveConf = null;
+ if (configuration != null) {
+ if (!HiveConf.class.isAssignableFrom(configuration.getClass())) {
+ hiveConf = new HiveConf(configuration, clazz);
+ } else {
+ hiveConf = (HiveConf) configuration;
+ }
+ }
+
+ if (hiveConf == null) {
+ hiveConf = HiveConfFactory.newInstance(clazz, metaStoreUri);
+ } else {
+ HiveConfFactory.overrideSettings(hiveConf);
+ }
+ return hiveConf;
+ }
+
+ public static HiveConf newInstance(Class<?> clazz, String metaStoreUri) {
+ HiveConf conf = new HiveConf(clazz);
+ if (metaStoreUri != null) {
+ setHiveConf(conf, HiveConf.ConfVars.METASTOREURIS, metaStoreUri);
+ }
+ overrideSettings(conf);
+ return conf;
+ }
+
+ public static void overrideSettings(HiveConf conf) {
+ setHiveConf(conf, HiveConf.ConfVars.HIVE_TXN_MANAGER, TRANSACTION_MANAGER);
+ setHiveConf(conf, HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, true);
+ setHiveConf(conf, HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI, true);
+ // Avoids creating Tez Client sessions internally as it takes much longer currently
+ setHiveConf(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "mr");
+ }
+
+ private static void setHiveConf(HiveConf conf, HiveConf.ConfVars var, String value) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Overriding HiveConf setting : {} = {}", var, value);
+ }
+ conf.setVar(var, value);
+ }
+
+ private static void setHiveConf(HiveConf conf, HiveConf.ConfVars var, boolean value) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Overriding HiveConf setting : {} = {}", var, value);
+ }
+ conf.setBoolVar(var, value);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/UgiMetaStoreClientFactory.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/UgiMetaStoreClientFactory.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/UgiMetaStoreClientFactory.java
new file mode 100644
index 0000000..2a4ddbe
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/UgiMetaStoreClientFactory.java
@@ -0,0 +1,102 @@
+package org.apache.hive.hcatalog.streaming.mutate;
+
+import java.io.IOException;
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+import java.lang.reflect.UndeclaredThrowableException;
+import java.security.PrivilegedExceptionAction;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.thrift.TException;
+
+import com.google.common.reflect.AbstractInvocationHandler;
+
+/**
+ * Creates a proxied {@link IMetaStoreClient client} that wraps calls in a {@link PrivilegedExceptionAction} if the
+ * {@link UserGroupInformation} is specified. Invokes directly otherwise.
+ */
+public class UgiMetaStoreClientFactory {
+
+ private static Set<Method> I_META_STORE_CLIENT_METHODS = getIMetaStoreClientMethods();
+
+ private final String metaStoreUri;
+ private final HiveConf conf;
+ private final boolean secureMode;
+ private final UserGroupInformation authenticatedUser;
+ private final String user;
+
+ public UgiMetaStoreClientFactory(String metaStoreUri, HiveConf conf, UserGroupInformation authenticatedUser,
+ String user, boolean secureMode) {
+ this.metaStoreUri = metaStoreUri;
+ this.conf = conf;
+ this.authenticatedUser = authenticatedUser;
+ this.user = user;
+ this.secureMode = secureMode;
+ if (metaStoreUri != null) {
+ conf.setVar(HiveConf.ConfVars.METASTOREURIS, metaStoreUri);
+ }
+ if (secureMode) {
+ conf.setBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, true);
+ }
+ }
+
+ public IMetaStoreClient newInstance() throws MetaException {
+ return newInstance(new HiveMetaStoreClient(conf));
+ }
+
+ public IMetaStoreClient newInstance(IMetaStoreClient delegate) throws MetaException {
+ return createProxy(delegate, user, authenticatedUser);
+ }
+
+ @Override
+ public String toString() {
+ return "UgiMetaStoreClientFactory [metaStoreUri=" + metaStoreUri + ", secureMode=" + secureMode
+ + ", authenticatedUser=" + authenticatedUser + ", user=" + user + "]";
+ }
+
+ private IMetaStoreClient createProxy(final IMetaStoreClient delegate, final String user,
+ final UserGroupInformation authenticatedUser) {
+ InvocationHandler handler = new AbstractInvocationHandler() {
+
+ @Override
+ protected Object handleInvocation(Object proxy, final Method method, final Object[] args) throws Throwable {
+ try {
+ if (!I_META_STORE_CLIENT_METHODS.contains(method) || authenticatedUser == null) {
+ return method.invoke(delegate, args);
+ }
+ try {
+ return authenticatedUser.doAs(new PrivilegedExceptionAction<Object>() {
+ @Override
+ public Object run() throws Exception {
+ return method.invoke(delegate, args);
+ }
+ });
+ } catch (IOException | InterruptedException e) {
+ throw new TException("PrivilegedExceptionAction failed as user '" + user + "'.", e);
+ }
+ } catch (UndeclaredThrowableException | InvocationTargetException e) {
+ throw e.getCause();
+ }
+ }
+ };
+
+ ClassLoader classLoader = IMetaStoreClient.class.getClassLoader();
+ Class<?>[] interfaces = new Class<?>[] { IMetaStoreClient.class };
+ Object proxy = Proxy.newProxyInstance(classLoader, interfaces, handler);
+ return IMetaStoreClient.class.cast(proxy);
+ }
+
+ private static Set<Method> getIMetaStoreClientMethods() {
+ return new HashSet<>(Arrays.asList(IMetaStoreClient.class.getDeclaredMethods()));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/AcidTable.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/AcidTable.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/AcidTable.java
new file mode 100644
index 0000000..20747db
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/AcidTable.java
@@ -0,0 +1,112 @@
+package org.apache.hive.hcatalog.streaming.mutate.client;
+
+import java.io.Serializable;
+
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
+
+/**
+ * Describes an ACID table that can receive mutation events. Used to encode the information required by workers to write
+ * ACID events without requiring them to once more retrieve the data from the meta store db.
+ */
+public class AcidTable implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ private final String databaseName;
+ private final String tableName;
+ private final boolean createPartitions;
+ private final TableType tableType;
+ private long transactionId;
+
+ private Table table;
+
+ AcidTable(String databaseName, String tableName, boolean createPartitions, TableType tableType) {
+ this.databaseName = databaseName;
+ this.tableName = tableName;
+ this.createPartitions = createPartitions;
+ this.tableType = tableType;
+ }
+
+ /**
+ * Returns {@code 0} until such a time that a {@link Transaction} has been acquired (when
+ * {@link MutatorClient#newTransaction()} exits), at which point this will return the
+ * {@link Transaction#getTransactionId() transaction id}.
+ */
+ public long getTransactionId() {
+ return transactionId;
+ }
+
+ public String getDatabaseName() {
+ return databaseName;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public boolean createPartitions() {
+ return createPartitions;
+ }
+
+ /**
+ * Returns {@code null} until such a time that the table described by the {@link #getDatabaseName() database_name}
+ * {@code .}{@link #getTableName() table_name} has been resolved with the meta store database (when
+ * {@link MutatorClient#connect()} exits), at which point this will then return the corresponding
+ * {@link StorageDescriptor#getOutputFormat() OutputFormat}.
+ */
+ public String getOutputFormatName() {
+ return table != null ? table.getSd().getOutputFormat() : null;
+ }
+
+ /**
+ * Returns {@code 0} until such a time that the table described by the {@link #getDatabaseName() database_name}
+ * {@code .}{@link #getTableName() table_name} has been resolved with the meta store database (when
+ * {@link MutatorClient#connect()} exits), at which point this will then return the corresponding
+ * {@link StorageDescriptor#getNumBuckets() total bucket count}.
+ */
+ public int getTotalBuckets() {
+ return table != null ? table.getSd().getNumBuckets() : 0;
+ }
+
+ public TableType getTableType() {
+ return tableType;
+ }
+
+ public String getQualifiedName() {
+ return (databaseName + "." + tableName).toUpperCase();
+ }
+
+ /**
+ * Returns {@code null} until such a time that the table described by the {@link #getDatabaseName() database_name}
+ * {@code .}{@link #getTableName() table_name} has been resolved with the meta store database (when
+ * {@link MutatorClient#connect()} exits), at which point this will then return the corresponding {@link Table}.
+ * Provided as a convenience to API users who may wish to gather further meta data regarding the table without
+ * connecting with the meta store once more.
+ */
+ public Table getTable() {
+ return table;
+ }
+
+ void setTransactionId(long transactionId) {
+ this.transactionId = transactionId;
+ }
+
+ void setTable(Table table) {
+ if (!databaseName.equalsIgnoreCase(table.getDbName())) {
+ throw new IllegalArgumentException("Incorrect database name.");
+ }
+ if (!tableName.equalsIgnoreCase(table.getTableName())) {
+ throw new IllegalArgumentException("Incorrect table name.");
+ }
+ this.table = table;
+ }
+
+ @Override
+ public String toString() {
+ return "AcidTable [databaseName=" + databaseName + ", tableName=" + tableName + ", createPartitions="
+ + createPartitions + ", tableType=" + tableType + ", outputFormatName=" + getOutputFormatName()
+ + ", totalBuckets=" + getTotalBuckets() + ", transactionId=" + transactionId + "]";
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/AcidTableSerializer.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/AcidTableSerializer.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/AcidTableSerializer.java
new file mode 100644
index 0000000..5d8a2bf
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/AcidTableSerializer.java
@@ -0,0 +1,100 @@
+package org.apache.hive.hcatalog.streaming.mutate.client;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.thrift.TDeserializer;
+import org.apache.thrift.TException;
+import org.apache.thrift.TSerializer;
+import org.apache.thrift.protocol.TCompactProtocol;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility to serialize/deserialize {@link AcidTable AcidTables} into strings so that they can be easily transported as
+ * {@link Configuration} properties.
+ */
+public class AcidTableSerializer {
+
+ private static final Logger LOG = LoggerFactory.getLogger(AcidTableSerializer.class);
+
+ /* Allow for improved schemes. */
+ private static final String PROLOG_V1 = "AcidTableV1:";
+
+ /** Returns a base 64 encoded representation of the supplied {@link AcidTable}. */
+ public static String encode(AcidTable table) throws IOException {
+ DataOutputStream data = null;
+ ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+ try {
+ data = new DataOutputStream(bytes);
+ data.writeUTF(table.getDatabaseName());
+ data.writeUTF(table.getTableName());
+ data.writeBoolean(table.createPartitions());
+ if (table.getTransactionId() <= 0) {
+ LOG.warn("Transaction ID <= 0. The recipient is probably expecting a transaction ID.");
+ }
+ data.writeLong(table.getTransactionId());
+ data.writeByte(table.getTableType().getId());
+
+ Table metaTable = table.getTable();
+ if (metaTable != null) {
+ byte[] thrift = new TSerializer(new TCompactProtocol.Factory()).serialize(metaTable);
+ data.writeInt(thrift.length);
+ data.write(thrift);
+ } else {
+ LOG.warn("Meta store table is null. The recipient is probably expecting an instance.");
+ data.writeInt(0);
+ }
+ } catch (TException e) {
+ throw new IOException("Error serializing meta store table.", e);
+ } finally {
+ data.close();
+ }
+
+ return PROLOG_V1 + new String(Base64.encodeBase64(bytes.toByteArray()), Charset.forName("UTF-8"));
+ }
+
+ /** Returns the {@link AcidTable} instance decoded from a base 64 representation. */
+ public static AcidTable decode(String encoded) throws IOException {
+ if (!encoded.startsWith(PROLOG_V1)) {
+ throw new IllegalStateException("Unsupported version.");
+ }
+ encoded = encoded.substring(PROLOG_V1.length());
+
+ byte[] decoded = Base64.decodeBase64(encoded);
+ AcidTable table = null;
+ try (DataInputStream in = new DataInputStream(new ByteArrayInputStream(decoded))) {
+ String databaseName = in.readUTF();
+ String tableName = in.readUTF();
+ boolean createPartitions = in.readBoolean();
+ long transactionId = in.readLong();
+ TableType tableType = TableType.valueOf(in.readByte());
+ int thriftLength = in.readInt();
+
+ table = new AcidTable(databaseName, tableName, createPartitions, tableType);
+ table.setTransactionId(transactionId);
+
+ Table metaTable = null;
+ if (thriftLength > 0) {
+ metaTable = new Table();
+ try {
+ byte[] thriftEncoded = new byte[thriftLength];
+ in.readFully(thriftEncoded, 0, thriftLength);
+ new TDeserializer(new TCompactProtocol.Factory()).deserialize(metaTable, thriftEncoded);
+ table.setTable(metaTable);
+ } catch (TException e) {
+ throw new IOException("Error deserializing meta store table.", e);
+ }
+ }
+ }
+ return table;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/ClientException.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/ClientException.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/ClientException.java
new file mode 100644
index 0000000..988dc38
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/ClientException.java
@@ -0,0 +1,15 @@
+package org.apache.hive.hcatalog.streaming.mutate.client;
+
+public class ClientException extends Exception {
+
+ private static final long serialVersionUID = 1L;
+
+ ClientException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ ClientException(String message) {
+ super(message);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/ConnectionException.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/ConnectionException.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/ConnectionException.java
new file mode 100644
index 0000000..b54455a
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/ConnectionException.java
@@ -0,0 +1,15 @@
+package org.apache.hive.hcatalog.streaming.mutate.client;
+
+public class ConnectionException extends ClientException {
+
+ private static final long serialVersionUID = 1L;
+
+ ConnectionException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ ConnectionException(String message) {
+ super(message);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/MutatorClient.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/MutatorClient.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/MutatorClient.java
new file mode 100644
index 0000000..2724525
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/MutatorClient.java
@@ -0,0 +1,140 @@
+package org.apache.hive.hcatalog.streaming.mutate.client;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hive.hcatalog.streaming.mutate.client.lock.Lock;
+import org.apache.hive.hcatalog.streaming.mutate.client.lock.LockFailureListener;
+import org.apache.thrift.TException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Responsible for orchestrating {@link Transaction Transactions} within which ACID table mutation events can occur.
+ * Typically this will be a large batch of delta operations.
+ */
+public class MutatorClient implements Closeable {
+
+ private static final Logger LOG = LoggerFactory.getLogger(MutatorClient.class);
+ private static final String TRANSACTIONAL_PARAM_KEY = "transactional";
+
+ private final IMetaStoreClient metaStoreClient;
+ private final Lock.Options lockOptions;
+ private final List<AcidTable> tables;
+ private boolean connected;
+
+ MutatorClient(IMetaStoreClient metaStoreClient, HiveConf configuration, LockFailureListener lockFailureListener,
+ String user, Collection<AcidTable> tables) {
+ this.metaStoreClient = metaStoreClient;
+ this.tables = Collections.unmodifiableList(new ArrayList<>(tables));
+
+ lockOptions = new Lock.Options()
+ .configuration(configuration)
+ .lockFailureListener(lockFailureListener == null ? LockFailureListener.NULL_LISTENER : lockFailureListener)
+ .user(user);
+ for (AcidTable table : tables) {
+ lockOptions.addTable(table.getDatabaseName(), table.getTableName());
+ }
+ }
+
+ /**
+ * Connects to the {@link IMetaStoreClient meta store} that will be used to manage {@link Transaction} life-cycles.
+ * Also checks that the tables destined to receive mutation events are able to do so. The client should only hold one
+ * open transaction at any given time (TODO: enforce this).
+ */
+ public void connect() throws ConnectionException {
+ if (connected) {
+ throw new ConnectionException("Already connected.");
+ }
+ for (AcidTable table : tables) {
+ checkTable(metaStoreClient, table);
+ }
+ LOG.debug("Connected to end point {}", metaStoreClient);
+ connected = true;
+ }
+
+ /** Creates a new {@link Transaction} by opening a transaction with the {@link IMetaStoreClient meta store}. */
+ public Transaction newTransaction() throws TransactionException {
+ if (!connected) {
+ throw new TransactionException("Not connected - cannot create transaction.");
+ }
+ Transaction transaction = new Transaction(metaStoreClient, lockOptions);
+ for (AcidTable table : tables) {
+ table.setTransactionId(transaction.getTransactionId());
+ }
+ LOG.debug("Created transaction {}", transaction);
+ return transaction;
+ }
+
+ /** Did the client connect successfully. Note the the client may have since become disconnected. */
+ public boolean isConnected() {
+ return connected;
+ }
+
+ /**
+ * Closes the client releasing any {@link IMetaStoreClient meta store} connections held. Does not notify any open
+ * transactions (TODO: perhaps it should?)
+ */
+ @Override
+ public void close() throws IOException {
+ metaStoreClient.close();
+ LOG.debug("Closed client.");
+ connected = false;
+ }
+
+ /**
+ * Returns the list of managed {@link AcidTable AcidTables} that can receive mutation events under the control of this
+ * client.
+ */
+ public List<AcidTable> getTables() throws ConnectionException {
+ if (!connected) {
+ throw new ConnectionException("Not connected - cannot interrogate tables.");
+ }
+ return Collections.<AcidTable> unmodifiableList(tables);
+ }
+
+ @Override
+ public String toString() {
+ return "MutatorClient [metaStoreClient=" + metaStoreClient + ", connected=" + connected + "]";
+ }
+
+ private void checkTable(IMetaStoreClient metaStoreClient, AcidTable acidTable) throws ConnectionException {
+ try {
+ LOG.debug("Checking table {}.", acidTable.getQualifiedName());
+ Table metaStoreTable = metaStoreClient.getTable(acidTable.getDatabaseName(), acidTable.getTableName());
+
+ if (acidTable.getTableType() == TableType.SINK) {
+ Map<String, String> parameters = metaStoreTable.getParameters();
+ if (!Boolean.parseBoolean(parameters.get(TRANSACTIONAL_PARAM_KEY))) {
+ throw new ConnectionException("Cannot stream to table that is not transactional: '"
+ + acidTable.getQualifiedName() + "'.");
+ }
+ int totalBuckets = metaStoreTable.getSd().getNumBuckets();
+ LOG.debug("Table {} has {} buckets.", acidTable.getQualifiedName(), totalBuckets);
+ if (totalBuckets <= 0) {
+ throw new ConnectionException("Cannot stream to table that has not been bucketed: '"
+ + acidTable.getQualifiedName() + "'.");
+ }
+
+ String outputFormat = metaStoreTable.getSd().getOutputFormat();
+ LOG.debug("Table {} has {} OutputFormat.", acidTable.getQualifiedName(), outputFormat);
+ acidTable.setTable(metaStoreTable);
+ }
+ } catch (NoSuchObjectException e) {
+ throw new ConnectionException("Invalid table '" + acidTable.getQualifiedName() + "'", e);
+ } catch (TException e) {
+ throw new ConnectionException("Error communicating with the meta store", e);
+ }
+ LOG.debug("Table {} OK.", acidTable.getQualifiedName());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/MutatorClientBuilder.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/MutatorClientBuilder.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/MutatorClientBuilder.java
new file mode 100644
index 0000000..6c21c59
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/MutatorClientBuilder.java
@@ -0,0 +1,115 @@
+package org.apache.hive.hcatalog.streaming.mutate.client;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hive.hcatalog.common.HCatUtil;
+import org.apache.hive.hcatalog.streaming.mutate.HiveConfFactory;
+import org.apache.hive.hcatalog.streaming.mutate.UgiMetaStoreClientFactory;
+import org.apache.hive.hcatalog.streaming.mutate.client.lock.Lock;
+import org.apache.hive.hcatalog.streaming.mutate.client.lock.LockFailureListener;
+
+/** Convenience class for building {@link MutatorClient} instances. */
+public class MutatorClientBuilder {
+
+ private final Map<String, AcidTable> tables = new HashMap<>();
+ private HiveConf configuration;
+ private UserGroupInformation authenticatedUser;
+ private String metaStoreUri;
+ public LockFailureListener lockFailureListener;
+
+ public MutatorClientBuilder configuration(HiveConf conf) {
+ this.configuration = conf;
+ return this;
+ }
+
+ public MutatorClientBuilder authenticatedUser(UserGroupInformation authenticatedUser) {
+ this.authenticatedUser = authenticatedUser;
+ return this;
+ }
+
+ public MutatorClientBuilder metaStoreUri(String metaStoreUri) {
+ this.metaStoreUri = metaStoreUri;
+ return this;
+ }
+
+ /** Set a listener to handle {@link Lock} failure events - highly recommended. */
+ public MutatorClientBuilder lockFailureListener(LockFailureListener lockFailureListener) {
+ this.lockFailureListener = lockFailureListener;
+ return this;
+ }
+
+ /**
+ * Adds a mutation event destination (an ACID table) to be managed by this client, which is either unpartitioned or
+ * will is not to have partitions created automatically.
+ */
+ public MutatorClientBuilder addSourceTable(String databaseName, String tableName) {
+ addTable(databaseName, tableName, false, TableType.SOURCE);
+ return this;
+ }
+
+ /**
+ * Adds a mutation event destination (an ACID table) to be managed by this client, which is either unpartitioned or
+ * will is not to have partitions created automatically.
+ */
+ public MutatorClientBuilder addSinkTable(String databaseName, String tableName) {
+ return addSinkTable(databaseName, tableName, false);
+ }
+
+ /**
+ * Adds a partitioned mutation event destination (an ACID table) to be managed by this client, where new partitions
+ * will be created as needed.
+ */
+ public MutatorClientBuilder addSinkTable(String databaseName, String tableName, boolean createPartitions) {
+ addTable(databaseName, tableName, createPartitions, TableType.SINK);
+ return this;
+ }
+
+ private void addTable(String databaseName, String tableName, boolean createPartitions, TableType tableType) {
+ if (databaseName == null) {
+ throw new IllegalArgumentException("Database cannot be null");
+ }
+ if (tableName == null) {
+ throw new IllegalArgumentException("Table cannot be null");
+ }
+ String key = (databaseName + "." + tableName).toUpperCase();
+ AcidTable previous = tables.get(key);
+ if (previous != null) {
+ if (tableType == TableType.SINK && previous.getTableType() != TableType.SINK) {
+ tables.remove(key);
+ } else {
+ throw new IllegalArgumentException("Table has already been added: " + databaseName + "." + tableName);
+ }
+ }
+
+ Table table = new Table();
+ table.setDbName(databaseName);
+ table.setTableName(tableName);
+ tables.put(key, new AcidTable(databaseName, tableName, createPartitions, tableType));
+ }
+
+ /** Builds the client. */
+ public MutatorClient build() throws ClientException, MetaException {
+ String user = authenticatedUser == null ? System.getProperty("user.name") : authenticatedUser.getShortUserName();
+ boolean secureMode = authenticatedUser == null ? false : authenticatedUser.hasKerberosCredentials();
+
+ configuration = HiveConfFactory.newInstance(configuration, this.getClass(), metaStoreUri);
+
+ IMetaStoreClient metaStoreClient;
+ try {
+ metaStoreClient = new UgiMetaStoreClientFactory(metaStoreUri, configuration, authenticatedUser, user, secureMode)
+ .newInstance(HCatUtil.getHiveMetastoreClient(configuration));
+ } catch (IOException e) {
+ throw new ClientException("Could not create meta store client.", e);
+ }
+
+ return new MutatorClient(metaStoreClient, configuration, lockFailureListener, user, tables.values());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/TableType.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/TableType.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/TableType.java
new file mode 100644
index 0000000..aa6d239
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/TableType.java
@@ -0,0 +1,37 @@
+package org.apache.hive.hcatalog.streaming.mutate.client;
+
+public enum TableType {
+ SOURCE((byte) 0),
+ SINK((byte) 1);
+
+ private static final TableType[] INDEX = buildIndex();
+
+ private static TableType[] buildIndex() {
+ TableType[] index = new TableType[TableType.values().length];
+ for (TableType type : values()) {
+ byte position = type.getId();
+ if (index[position] != null) {
+ throw new IllegalStateException("Overloaded index: " + position);
+ }
+ index[position] = type;
+ }
+ return index;
+ }
+
+ private byte id;
+
+ private TableType(byte id) {
+ this.id = id;
+ }
+
+ public byte getId() {
+ return id;
+ }
+
+ public static TableType valueOf(byte id) {
+ if (id < 0 || id >= INDEX.length) {
+ throw new IllegalArgumentException("Invalid id: " + id);
+ }
+ return INDEX[id];
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/Transaction.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/Transaction.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/Transaction.java
new file mode 100644
index 0000000..6532900
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/Transaction.java
@@ -0,0 +1,114 @@
+package org.apache.hive.hcatalog.streaming.mutate.client;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
+import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
+import org.apache.hive.hcatalog.streaming.TransactionBatch.TxnState;
+import org.apache.hive.hcatalog.streaming.mutate.client.lock.Lock;
+import org.apache.hive.hcatalog.streaming.mutate.client.lock.LockException;
+import org.apache.thrift.TException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class Transaction {
+
+ private static final Logger LOG = LoggerFactory.getLogger(Transaction.class);
+
+ private final Lock lock;
+ private final IMetaStoreClient metaStoreClient;
+ private final long transactionId;
+
+ private TxnState state;
+
+ Transaction(IMetaStoreClient metaStoreClient, Lock.Options lockOptions) throws TransactionException {
+ this(metaStoreClient, new Lock(metaStoreClient, lockOptions));
+ }
+
+ /** Visible for testing only. */
+ Transaction(IMetaStoreClient metaStoreClient, Lock lock) throws TransactionException {
+ this.metaStoreClient = metaStoreClient;
+ this.lock = lock;
+ transactionId = open(lock.getUser());
+ }
+
+ public long getTransactionId() {
+ return transactionId;
+ }
+
+ public TxnState getState() {
+ return state;
+ }
+
+ /**
+ * Begin the transaction. Acquires a {@link Lock} for the transaction and {@link AcidTable AcidTables}.
+ */
+ public void begin() throws TransactionException {
+ try {
+ lock.acquire(transactionId);
+ } catch (LockException e) {
+ throw new TransactionException("Unable to acquire lock for transaction: " + transactionId, e);
+ }
+ state = TxnState.OPEN;
+ LOG.debug("Begin. Transaction id: {}", transactionId);
+ }
+
+ /** Commits the transaction. Releases the {@link Lock}. */
+ public void commit() throws TransactionException {
+ try {
+ lock.release();
+ } catch (LockException e) {
+ // This appears to leave the remove transaction in an inconsistent state but the heartbeat is now
+ // cancelled and it will eventually time out
+ throw new TransactionException("Unable to release lock: " + lock + " for transaction: " + transactionId, e);
+ }
+ try {
+ metaStoreClient.commitTxn(transactionId);
+ state = TxnState.COMMITTED;
+ } catch (NoSuchTxnException e) {
+ throw new TransactionException("Invalid transaction id: " + transactionId, e);
+ } catch (TxnAbortedException e) {
+ throw new TransactionException("Aborted transaction cannot be committed: " + transactionId, e);
+ } catch (TException e) {
+ throw new TransactionException("Unable to commit transaction: " + transactionId, e);
+ }
+ LOG.debug("Committed. Transaction id: {}", transactionId);
+ }
+
+ /** Aborts the transaction. Releases the {@link Lock}. */
+ public void abort() throws TransactionException {
+ try {
+ lock.release();
+ } catch (LockException e) {
+ // This appears to leave the remove transaction in an inconsistent state but the heartbeat is now
+ // cancelled and it will eventually time out
+ throw new TransactionException("Unable to release lock: " + lock + " for transaction: " + transactionId, e);
+ }
+ try {
+ metaStoreClient.rollbackTxn(transactionId);
+ state = TxnState.ABORTED;
+ } catch (NoSuchTxnException e) {
+ throw new TransactionException("Unable to abort invalid transaction id : " + transactionId, e);
+ } catch (TException e) {
+ throw new TransactionException("Unable to abort transaction id : " + transactionId, e);
+ }
+ LOG.debug("Aborted. Transaction id: {}", transactionId);
+ }
+
+ @Override
+ public String toString() {
+ return "Transaction [transactionId=" + transactionId + ", state=" + state + "]";
+ }
+
+ private long open(String user) throws TransactionException {
+ long transactionId = -1;
+ try {
+ transactionId = metaStoreClient.openTxn(user);
+ state = TxnState.INACTIVE;
+ } catch (TException e) {
+ throw new TransactionException("Unable to open transaction for user: " + user, e);
+ }
+ LOG.debug("Opened transaction with id: {}", transactionId);
+ return transactionId;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/TransactionException.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/TransactionException.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/TransactionException.java
new file mode 100644
index 0000000..48fb1cf
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/TransactionException.java
@@ -0,0 +1,15 @@
+package org.apache.hive.hcatalog.streaming.mutate.client;
+
+public class TransactionException extends ClientException {
+
+ private static final long serialVersionUID = 1L;
+
+ TransactionException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ TransactionException(String message) {
+ super(message);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/HeartbeatFactory.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/HeartbeatFactory.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/HeartbeatFactory.java
new file mode 100644
index 0000000..5814d37
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/HeartbeatFactory.java
@@ -0,0 +1,30 @@
+package org.apache.hive.hcatalog.streaming.mutate.client.lock;
+
+import java.util.Collection;
+import java.util.Timer;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Creates a default {@link HeartbeatTimerTask} for {@link Lock Locks}. */
+class HeartbeatFactory {
+
+ private static final Logger LOG = LoggerFactory.getLogger(HeartbeatFactory.class);
+
+ /** Creates a new {@link HeartbeatTimerTask} instance for the {@link Lock} and schedules it. */
+ Timer newInstance(IMetaStoreClient metaStoreClient, LockFailureListener listener, Long transactionId,
+ Collection<Table> tableDescriptors, long lockId, int heartbeatPeriod) {
+ Timer heartbeatTimer = new Timer("hive-lock-heartbeat[lockId=" + lockId + ", transactionId=" + transactionId + "]",
+ true);
+ HeartbeatTimerTask task = new HeartbeatTimerTask(metaStoreClient, listener, transactionId, tableDescriptors, lockId);
+ heartbeatTimer.schedule(task, TimeUnit.SECONDS.toMillis(heartbeatPeriod),
+ TimeUnit.SECONDS.toMillis(heartbeatPeriod));
+
+ LOG.debug("Scheduled heartbeat timer task: {}", heartbeatTimer);
+ return heartbeatTimer;
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/HeartbeatTimerTask.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/HeartbeatTimerTask.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/HeartbeatTimerTask.java
new file mode 100644
index 0000000..2446c10
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/HeartbeatTimerTask.java
@@ -0,0 +1,66 @@
+package org.apache.hive.hcatalog.streaming.mutate.client.lock;
+
+import java.util.Collection;
+import java.util.TimerTask;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
+import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
+import org.apache.hive.hcatalog.streaming.mutate.client.Transaction;
+import org.apache.thrift.TException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * {@link TimerTask} that sends {@link IMetaStoreClient#heartbeat(long, long) heartbeat} events to the
+ * {@link IMetaStoreClient meta store} to keet the {@link Lock} and {@link Transaction} alive. Nofifies the registered
+ * {@link LockFailureListener} should the lock fail.
+ */
+class HeartbeatTimerTask extends TimerTask {
+
+ private static final Logger LOG = LoggerFactory.getLogger(HeartbeatTimerTask.class);
+
+ private final IMetaStoreClient metaStoreClient;
+ private final long lockId;
+ private final Long transactionId;
+ private final LockFailureListener listener;
+ private final Collection<Table> tableDescriptors;
+
+ HeartbeatTimerTask(IMetaStoreClient metaStoreClient, LockFailureListener listener, Long transactionId,
+ Collection<Table> tableDescriptors, long lockId) {
+ this.metaStoreClient = metaStoreClient;
+ this.listener = listener;
+ this.transactionId = transactionId;
+ this.tableDescriptors = tableDescriptors;
+ this.lockId = lockId;
+ LOG.debug("Reporting to listener {}", listener);
+ }
+
+ @Override
+ public void run() {
+ try {
+ // I'm assuming that there is no transaction ID for a read lock.
+ metaStoreClient.heartbeat(transactionId == null ? 0 : transactionId, lockId);
+ LOG.debug("Sent heartbeat for lock={}, transactionId={}", lockId, transactionId);
+ } catch (NoSuchLockException | NoSuchTxnException | TxnAbortedException e) {
+ failLock(e);
+ } catch (TException e) {
+ LOG.warn("Failed to send heartbeat to meta store.", e);
+ }
+ }
+
+ private void failLock(Exception e) {
+ LOG.debug("Lock " + lockId + " failed, cancelling heartbeat and notifiying listener: " + listener, e);
+ // Cancel the heartbeat
+ cancel();
+ listener.lockFailed(lockId, transactionId, Lock.asStrings(tableDescriptors), e);
+ }
+
+ @Override
+ public String toString() {
+ return "HeartbeatTimerTask [lockId=" + lockId + ", transactionId=" + transactionId + "]";
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java
new file mode 100644
index 0000000..21604df
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java
@@ -0,0 +1,282 @@
+package org.apache.hive.hcatalog.streaming.mutate.client.lock;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.Timer;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.LockComponentBuilder;
+import org.apache.hadoop.hive.metastore.LockRequestBuilder;
+import org.apache.hadoop.hive.metastore.api.LockComponent;
+import org.apache.hadoop.hive.metastore.api.LockRequest;
+import org.apache.hadoop.hive.metastore.api.LockResponse;
+import org.apache.hadoop.hive.metastore.api.LockState;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.thrift.TException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Manages the state required to safely read/write from/to an ACID table.
+ */
+public class Lock {
+
+ private static final Logger LOG = LoggerFactory.getLogger(Lock.class);
+
+ private static final double HEARTBEAT_FACTOR = 0.75;
+ private static final int DEFAULT_HEARTBEAT_PERIOD = 275;
+
+ private final IMetaStoreClient metaStoreClient;
+ private final HeartbeatFactory heartbeatFactory;
+ private final LockFailureListener listener;
+ private final Collection<Table> tableDescriptors;
+ private final int lockRetries;
+ private final int retryWaitSeconds;
+ private final String user;
+ private final HiveConf hiveConf;
+
+ private Timer heartbeat;
+ private Long lockId;
+ private Long transactionId;
+
+ public Lock(IMetaStoreClient metaStoreClient, Options options) {
+ this(metaStoreClient, new HeartbeatFactory(), options.hiveConf, options.listener, options.user,
+ options.descriptors, options.lockRetries, options.retryWaitSeconds);
+ }
+
+ /** Visible for testing only. */
+ Lock(IMetaStoreClient metaStoreClient, HeartbeatFactory heartbeatFactory, HiveConf hiveConf,
+ LockFailureListener listener, String user, Collection<Table> tableDescriptors, int lockRetries,
+ int retryWaitSeconds) {
+ this.metaStoreClient = metaStoreClient;
+ this.heartbeatFactory = heartbeatFactory;
+ this.hiveConf = hiveConf;
+ this.user = user;
+ this.tableDescriptors = tableDescriptors;
+ this.listener = listener;
+ this.lockRetries = lockRetries;
+ this.retryWaitSeconds = retryWaitSeconds;
+
+ if (LockFailureListener.NULL_LISTENER.equals(listener)) {
+ LOG.warn("No {} supplied. Data quality and availability cannot be assured.",
+ LockFailureListener.class.getSimpleName());
+ }
+ }
+
+ /** Attempts to acquire a read lock on the table, returns if successful, throws exception otherwise. */
+ public void acquire() throws LockException {
+ lockId = internalAcquire(null);
+ initiateHeartbeat();
+ }
+
+ /** Attempts to acquire a read lock on the table, returns if successful, throws exception otherwise. */
+ public void acquire(long transactionId) throws LockException {
+ lockId = internalAcquire(transactionId);
+ this.transactionId = transactionId;
+ initiateHeartbeat();
+ }
+
+ /** Attempts to release the read lock on the table. Throws an exception if the lock failed at any point. */
+ public void release() throws LockException {
+ if (heartbeat != null) {
+ heartbeat.cancel();
+ }
+ internalRelease();
+ }
+
+ public String getUser() {
+ return user;
+ }
+
+ @Override
+ public String toString() {
+ return "Lock [metaStoreClient=" + metaStoreClient + ", lockId=" + lockId + ", transactionId=" + transactionId
+ + "]";
+ }
+
+ private long internalAcquire(Long transactionId) throws LockException {
+ int attempts = 0;
+ LockRequest request = buildSharedLockRequest(transactionId);
+ do {
+ LockResponse response = null;
+ try {
+ response = metaStoreClient.lock(request);
+ } catch (TException e) {
+ throw new LockException("Unable to acquire lock for tables: [" + join(tableDescriptors) + "]", e);
+ }
+ if (response != null) {
+ LockState state = response.getState();
+ if (state == LockState.NOT_ACQUIRED || state == LockState.ABORT) {
+ // I expect we'll only see NOT_ACQUIRED here?
+ break;
+ }
+ if (state == LockState.ACQUIRED) {
+ LOG.debug("Acquired lock {}", response.getLockid());
+ return response.getLockid();
+ }
+ if (state == LockState.WAITING) {
+ try {
+ Thread.sleep(TimeUnit.SECONDS.toMillis(retryWaitSeconds));
+ } catch (InterruptedException e) {
+ }
+ }
+ }
+ attempts++;
+ } while (attempts < lockRetries);
+ throw new LockException("Could not acquire lock on tables: [" + join(tableDescriptors) + "]");
+ }
+
+ private void internalRelease() {
+ try {
+ // if there is a transaction then this lock will be released on commit/abort/rollback instead.
+ if (lockId != null && transactionId == null) {
+ metaStoreClient.unlock(lockId);
+ LOG.debug("Released lock {}", lockId);
+ lockId = null;
+ }
+ } catch (TException e) {
+ LOG.error("Lock " + lockId + " failed.", e);
+ listener.lockFailed(lockId, transactionId, asStrings(tableDescriptors), e);
+ }
+ }
+
+ private LockRequest buildSharedLockRequest(Long transactionId) {
+ LockRequestBuilder requestBuilder = new LockRequestBuilder();
+ for (Table descriptor : tableDescriptors) {
+ LockComponent component = new LockComponentBuilder()
+ .setDbName(descriptor.getDbName())
+ .setTableName(descriptor.getTableName())
+ .setShared()
+ .build();
+ requestBuilder.addLockComponent(component);
+ }
+ if (transactionId != null) {
+ requestBuilder.setTransactionId(transactionId);
+ }
+ LockRequest request = requestBuilder.setUser(user).build();
+ return request;
+ }
+
+ private void initiateHeartbeat() {
+ int heartbeatPeriod = getHeartbeatPeriod();
+ LOG.debug("Heartbeat period {}s", heartbeatPeriod);
+ heartbeat = heartbeatFactory.newInstance(metaStoreClient, listener, transactionId, tableDescriptors, lockId,
+ heartbeatPeriod);
+ }
+
+ private int getHeartbeatPeriod() {
+ int heartbeatPeriod = DEFAULT_HEARTBEAT_PERIOD;
+ if (hiveConf != null) {
+ // This value is always in seconds and includes an 's' suffix.
+ String txTimeoutSeconds = hiveConf.getVar(HiveConf.ConfVars.HIVE_TXN_TIMEOUT);
+ if (txTimeoutSeconds != null) {
+ // We want to send the heartbeat at an interval that is less than the timeout.
+ heartbeatPeriod = Math.max(1,
+ (int) (Integer.parseInt(txTimeoutSeconds.substring(0, txTimeoutSeconds.length() - 1)) * HEARTBEAT_FACTOR));
+ }
+ }
+ return heartbeatPeriod;
+ }
+
+ /** Visible for testing only. */
+ Long getLockId() {
+ return lockId;
+ }
+
+ /** Visible for testing only. */
+ Long getTransactionId() {
+ return transactionId;
+ }
+
+ /** Visible for testing only. */
+ static String join(Iterable<? extends Object> values) {
+ return StringUtils.join(values, ",");
+ }
+
+ /** Visible for testing only. */
+ static List<String> asStrings(Collection<Table> tables) {
+ List<String> strings = new ArrayList<>(tables.size());
+ for (Table descriptor : tables) {
+ strings.add(descriptor.getDbName() + "." + descriptor.getTableName());
+ }
+ return strings;
+ }
+
+ /** Constructs a lock options for a set of Hive ACID tables from which we wish to read. */
+ public static final class Options {
+ Set<Table> descriptors = new LinkedHashSet<>();
+ LockFailureListener listener = LockFailureListener.NULL_LISTENER;
+ int lockRetries = 5;
+ int retryWaitSeconds = 30;
+ String user;
+ HiveConf hiveConf;
+
+ /** Adds a table for which a shared read lock will be requested. */
+ public Options addTable(String databaseName, String tableName) {
+ checkNotNullOrEmpty(databaseName);
+ checkNotNullOrEmpty(tableName);
+ Table table = new Table();
+ table.setDbName(databaseName);
+ table.setTableName(tableName);
+ descriptors.add(table);
+ return this;
+ }
+
+ public Options user(String user) {
+ checkNotNullOrEmpty(user);
+ this.user = user;
+ return this;
+ }
+
+ public Options configuration(HiveConf hiveConf) {
+ checkNotNull(hiveConf);
+ this.hiveConf = hiveConf;
+ return this;
+ }
+
+ /** Sets a listener to handle failures of locks that were previously acquired. */
+ public Options lockFailureListener(LockFailureListener listener) {
+ checkNotNull(listener);
+ this.listener = listener;
+ return this;
+ }
+
+ public Options lockRetries(int lockRetries) {
+ checkArgument(lockRetries > 0);
+ this.lockRetries = lockRetries;
+ return this;
+ }
+
+ public Options retryWaitSeconds(int retryWaitSeconds) {
+ checkArgument(retryWaitSeconds > 0);
+ this.retryWaitSeconds = retryWaitSeconds;
+ return this;
+ }
+
+ private static void checkArgument(boolean value) {
+ if (!value) {
+ throw new IllegalArgumentException();
+ }
+ }
+
+ private static void checkNotNull(Object value) {
+ if (value == null) {
+ throw new IllegalArgumentException();
+ }
+ }
+
+ private static void checkNotNullOrEmpty(String value) {
+ if (StringUtils.isBlank(value)) {
+ throw new IllegalArgumentException();
+ }
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/LockException.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/LockException.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/LockException.java
new file mode 100644
index 0000000..67ed601
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/LockException.java
@@ -0,0 +1,15 @@
+package org.apache.hive.hcatalog.streaming.mutate.client.lock;
+
+public class LockException extends Exception {
+
+ private static final long serialVersionUID = 1L;
+
+ public LockException(String message) {
+ super(message);
+ }
+
+ public LockException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/LockFailureListener.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/LockFailureListener.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/LockFailureListener.java
new file mode 100644
index 0000000..2b6a12a
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/LockFailureListener.java
@@ -0,0 +1,26 @@
+package org.apache.hive.hcatalog.streaming.mutate.client.lock;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Provides a means to handle the situation when a held lock fails. */
+public interface LockFailureListener {
+
+ static final Logger LOG = LoggerFactory.getLogger(LockFailureListener.class);
+
+ static final LockFailureListener NULL_LISTENER = new LockFailureListener() {
+ @Override
+ public void lockFailed(long lockId, Long transactionId, Iterable<String> tableNames, Throwable t) {
+ LOG.warn(
+ "Ignored lock failure: lockId=" + lockId + ", transactionId=" + transactionId + ", tables=" + tableNames, t);
+ }
+
+ public String toString() {
+ return LockFailureListener.class.getName() + ".NULL_LISTENER";
+ }
+ };
+
+ /** Called when the specified lock has failed. You should probably abort your job in this case. */
+ void lockFailed(long lockId, Long transactionId, Iterable<String> tableNames, Throwable t);
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/doc-files/system-overview.dot
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/doc-files/system-overview.dot b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/doc-files/system-overview.dot
new file mode 100644
index 0000000..79c30e7
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/doc-files/system-overview.dot
@@ -0,0 +1,27 @@
+digraph "API Usage" {
+ nodesep=1.2;
+
+ DATA [label="ACID\ndataset",shape=oval,style=filled,color="gray"];
+ CHANGES [label="Changed\ndata",shape=oval,style=filled,color="gray"];
+
+ META_STORE [label="Hive\nMetaStore",shape=box,style=filled,color="darkseagreen3"];
+ HIVE_CLI [label="Hive\nCLI",shape=box,style=filled,color="darkseagreen3"];
+
+ MERGE1 [label="Compute\nmutations\n(your code)",shape=box,style=filled,color="khaki1"];
+ SORT [label="Group\n& sort\n(your code)",shape=box,style=filled,color="khaki1"];
+ CLIENT [label="Mutator\nclient",shape=box,style=filled,color="lightblue"];
+ BUCKET [label="Bucket ID\nappender",shape=box,style=filled,color="lightblue"];
+ COORD [label="Mutator\ncoordinator",shape=box,style=filled,color="lightblue"];
+ CLIENT -> COORD [label="Provides\nconf to"];
+ CLIENT -> BUCKET [label="Provides\nconf to"];
+
+ CLIENT -> META_STORE [label="Manages\ntxns using"];
+ CHANGES -> MERGE1 [label="Reads ∆s\nfrom"];
+ DATA -> MERGE1 [label="Reads\nROW__IDs\nfrom"];
+ BUCKET -> MERGE1 [label="Appends ids\nto inserts"];
+ MERGE1 -> SORT;
+ SORT -> COORD [label="Issues\nmutations to"];
+ COORD -> DATA [label="Writes to"];
+ DATA -> HIVE_CLI [label="Read by"];
+ META_STORE -> DATA [label="Compacts"];
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/package.html
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/package.html b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/package.html
new file mode 100644
index 0000000..9fc10b6
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/package.html
@@ -0,0 +1,495 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd">
+
+<html lang="en">
+
+<head>
+<meta name=Title content="HCatalog Streaming Mutation API">
+<meta name=Keywords content="HCatalog Streaming Mutation ACID">
+<meta http-equiv=Content-Type content="text/html; charset=utf-8">
+<title>HCatalog Streaming Mutation API</title>
+</head>
+
+<body>
+
+<h1>HCatalog Streaming Mutation API -- high level description</h1>
+
+<h2>Background</h2>
+<p>
+In certain data processing use cases it is necessary to modify existing
+data when new facts arrive. An example of this is the classic ETL merge
+where a copy of a data set is kept in sync with a master by the frequent
+application of deltas. The deltas describe the mutations (inserts,
+updates, deletes) that have occurred to the master since the previous
+sync. To implement such a case using Hadoop traditionally demands that
+the partitions containing records targeted by the mutations be
+rewritten. This is a coarse approach; a partition containing millions of
+records might be rebuilt because of a single record change. Additionally
+these partitions cannot be restated atomically; at some point the old
+partition data must be swapped with the new partition data. When this
+swap occurs, usually by issuing an HDFS
+<code>rm</code>
+followed by a
+<code>mv</code>
+, the possibility exists where the data appears to be unavailable and
+hence any downstream jobs consuming the data might unexpectedly fail.
+Therefore data processing patterns that restate raw data on HDFS cannot
+operate robustly without some external mechanism to orchestrate
+concurrent access to changing data.
+</p>
+
+<p>
+The availability of ACID tables in Hive provides a mechanism that both
+enables concurrent access to data stored in HDFS (so long as it's in the
+ORC+ACID format), and also permits row level mutations or records within
+a table, without the need to rewrite the existing data. But while Hive
+itself supports
+<code>INSERT</code>
+,
+<code>UPDATE</code>
+and
+<code>DELETE</code>
+commands, and the ORC format can support large batches of mutations in a
+transaction, Hive's execution engine currently submits each individual
+mutation operation in a separate transaction and issues table scans (M/R
+jobs) to execute them. It does not currently scale to the demands of
+processing large deltas in an atomic manner. Furthermore it would be
+advantageous to extend atomic batch mutation capabilities beyond Hive by
+making them available to other data processing frameworks. The Streaming
+Mutation API does just this.
+</p>
+
+<p>The Streaming Mutation API, although similar to the Streaming
+API, has a number of differences and are built to enable very different
+use cases. Superficially, the Streaming API can only write new data
+whereas the mutation API can also modify existing data. However the two
+APIs also based on very different transaction models. The Streaming API
+focuses on surfacing a continuous stream of new data into a Hive table
+and does so by batching small sets of writes into multiple short-lived
+transactions. Conversely the mutation API is designed to infrequently
+apply large sets of mutations to a data set in an atomic fashion; all
+mutations will either be applied or they will not. This instead mandates
+the use of a single long-lived transaction. This table summarises the
+attributes of each API:</p>
+
+<table border="1">
+<thead>
+<tr>
+<th>Attribute</th>
+<th>Streaming API</th>
+<th>Mutation API</th>
+</tr>
+<tr>
+<td>Ingest type</td>
+<td>Data arrives continuously</td>
+<td>Ingests are performed periodically and the mutations are
+applied in a single batch</td>
+</tr>
+<tr>
+<td>Transaction scope</td>
+<td>Transactions are created for small batches of writes</td>
+<td>The entire set of mutations should be applied within a single
+transaction</td>
+</tr>
+<tr>
+<td>Data availability</td>
+<td>Surfaces new data to users frequently and quickly</td>
+<td>Change sets should be applied atomically, either the effect of
+the delta is visible or it is not</td>
+</tr>
+<tr>
+<td>Sensitive to record order</td>
+<td>No, records do not have pre-existing lastTxnIds or bucketIds.
+Records are likely being written into a single partition (today's date
+for example)</td>
+<td>Yes, all mutated records have existing <code>RecordIdentifiers</code>
+and must be grouped by (partitionValues, bucketId) and sorted by
+lastTxnId. These record coordinates initially arrive in an order that is
+effectively random.
+</td>
+</tr>
+<tr>
+<td>Impact of a write failure</td>
+<td>Transaction can be aborted and producer can choose to resubmit
+failed records as ordering is not important.</td>
+<td>Ingest for the respective must be halted and failed records
+resubmitted to preserve sequence.</td>
+</tr>
+<tr>
+<td>User perception of missing data</td>
+<td>Data has not arrived yet → "latency?"</td>
+<td>"This data is inconsistent, some records have been updated, but
+other related records have not" - consider here the classic transfer
+between bank accounts scenario</td>
+</tr>
+<tr>
+<td>API end point scope</td>
+<td>A given <code>HiveEndPoint</code> instance submits many
+transactions to a specific bucket, in a specific partition, of a
+specific table
+</td>
+<td>A set of<code>MutationCoordinators</code> write changes to
+unknown set of buckets, of an unknown set of partitions, of specific
+tables (can be more than one), within a single transaction.
+</td>
+</tr>
+</thead>
+</table>
+
+<h2>Structure</h2>
+<p>The API comprises two main concerns: transaction management, and
+the writing of mutation operations to the data set. The two concerns
+have a minimal coupling as it is expected that transactions will be
+initiated from a single job launcher type processes while the writing of
+mutations will be scaled out across any number of worker nodes. In the
+context of Hadoop M/R these can be more concretely defined as the Tool
+and Map/Reduce task components. However, use of this architecture is not
+mandated and in fact both concerns could be handled within a single
+simple process depending on the requirements.</p>
+
+<p>Note that a suitably configured Hive instance is required to
+operate this system even if you do not intend to access the data from
+within Hive. Internally, transactions are managed by the Hive MetaStore.
+Mutations are performed to HDFS via ORC APIs that bypass the MetaStore.
+Additionally you may wish to configure your MetaStore instance to
+perform periodic data compactions.</p>
+
+<p>
+<b>Note on packaging</b>: The APIs are defined in the <b>org.apache.hive.hcatalog.streaming.mutate</b>
+Java package and included as the hive-hcatalog-streaming jar.
+</p>
+
+<h2>Data requirements</h2>
+<p>
+Generally speaking, to apply a mutation to a record one must have some
+unique key that identifies the record. However, primary keys are not a
+construct provided by Hive. Internally Hive uses
+<code>RecordIdentifiers</code>
+stored in a virtual
+<code>ROW__ID</code>
+column to uniquely identified records within an ACID table. Therefore,
+any process that wishes to issue mutations to a table via this API must
+have available the corresponding row ids for the target records. What
+this means in practice is that the process issuing mutations must first
+read in a current snapshot the data and then join the mutations on some
+domain specific primary key to obtain the corresponding Hive
+<code>ROW__ID</code>
+. This is effectively what occurs within Hive's table scan process when
+an
+<code>UPDATE</code>
+or
+<code>DELETE</code>
+statement is executed. The
+<code>AcidInputFormat</code>
+provides access to this data via
+<code>AcidRecordReader.getRecordIdentifier()</code>
+.
+</p>
+
+<p>
+The implementation of the ACID format places some constraints on the
+order in which records are written and it is important that this
+ordering is enforced. Additionally, data must be grouped appropriately
+to adhere to the constraints imposed be the
+<code>OrcRecordUpdater</code>
+. Grouping also makes it possible parallelise the writing of mutations
+for the purposes of scaling. Finally, to correctly bucket new records
+(inserts) there is a slightly unintuitive trick that must be applied.
+</p>
+
+<p>All of these data sequencing concerns are the responsibility of
+the client process calling the API which is assumed to have first class
+grouping and sorting capabilities (Hadoop Map/Reduce etc.) The streaming
+API provides nothing more than validators that fail fast when they
+encounter groups and records that are out of sequence.</p>
+
+<p>In short, API client processes should prepare data for the mutate
+API like so:</p>
+<ul>
+<li><b>MUST:</b> Order records by <code>ROW__ID.originalTxn</code>,
+then <code>ROW__ID.rowId</code>.</li>
+<li><b>MUST:</b> Assign a <code>ROW__ID</code> containing a
+computed <code>bucketId</code> to records to be inserted.</li>
+<li><b>SHOULD:</b> Group/partition by table partition value, then <code>ROW__ID.bucketId</code>.</li>
+</ul>
+
+<p>
+The addition of a bucket ids to insert records prior to grouping and
+sorting seems unintuitive. However, it is required both to ensure
+adequate partitioning of new data and bucket allocation consistent with
+that provided by Hive. In a typical ETL the majority of mutation events
+are inserts, often targeting a single partition (new data for the
+previous day, hour, etc.) If more that one worker is writing said
+events, were we to leave the bucket id empty then all inserts would go
+to a single worker (e.g: reducer) and the workload could be heavily
+skewed. The assignment of a computed bucket allows inserts to be more
+usefully distributed across workers. Additionally, when Hive is working
+with the data it may expect records to have been bucketed in a way that
+is consistent with it's own internal scheme. A convenience type and
+method is provided to more easily compute and append bucket ids:
+<code>BucketIdResolver</code>
+and
+<code>BucketIdResolverImpl</code>
+.
+</p>
+
+<p>Update operations should not attempt to modify values of
+partition or bucketing columns. The API does not prevent this and such
+attempts could lead to data corruption.</p>
+
+<h2>Streaming requirements</h2>
+<p>A few things are currently required to use streaming.</p>
+
+<p>
+<ol>
+<li>Currently, only ORC storage format is supported. So '<b>stored
+as orc</b>' must be specified during table creation.
+</li>
+<li>The hive table must be bucketed, but not sorted. So something
+like '<b>clustered by (<i>colName</i>) into <i>10</i> buckets
+</b>' must be specified during table creation.
+</li>
+<li>User of the client streaming process must have the necessary
+permissions to write to the table or partition and create partitions in
+the table.</li>
+<li>Settings required in hive-site.xml for Metastore:
+<ol>
+<li><b>hive.txn.manager =
+org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</b></li>
+<li><b>hive.support.concurrency = true </b></li>
+<li><b>hive.compactor.initiator.on = true</b></li>
+<li><b>hive.compactor.worker.threads > 0 </b></li>
+</ol>
+</li>
+</ol>
+</p>
+
+<p>
+<b>Note:</b> Streaming mutations to <b>unpartitioned</b> tables is also
+supported.
+</p>
+
+<h2>Record layout</h2>
+<p>
+The structure, layout, and encoding of records is the exclusive concern
+of the client ETL mutation process and may be quite different from the
+target Hive ACID table. The mutation API requires concrete
+implementations of the
+<code>MutatorFactory</code>
+and
+<code>Mutator</code>
+classes to extract pertinent data from records and serialize data into
+the ACID files. Fortunately base classes are provided (
+<code>AbstractMutator</code>
+,
+<code>RecordInspectorImpl</code>
+) to simplify this effort and usually all that is required is the
+specification of a suitable
+<code>ObjectInspector</code>
+and the provision of the indexes of the
+<code>ROW__ID</code>
+and bucketed columns within the record structure. Note that all column
+indexes in these classes are with respect to your record structure, not
+the Hive table structure.
+</p>
+<p>
+You will likely also want to use a
+<code>BucketIdResolver</code>
+to append bucket ids to new records for insertion. Fortunately the core
+implementation is provided in
+<code>BucketIdResolverImpl</code>
+but note that bucket column indexes must be presented in the same order
+as they are in the Hive table definition to ensure consistent bucketing.
+Note that you cannot move records between buckets and an exception will
+be thrown if you attempt to do so. In real terms this mean that you
+should not attempt to modify the values in bucket columns with an
+<code>UPDATE</code>
+.
+</p>
+
+<h2>Connection and Transaction management</h2>
+<p>
+The
+<code>MutatorClient</code>
+class is used to create and manage transactions in which mutations can
+be performed. The scope of a transaction can extend across multiple ACID
+tables. When a client connects it communicates with the meta store to
+verify and acquire meta data for the target tables. An invocation of
+<code>newTransaction</code>
+then opens a transaction with the meta store, finalizes a collection of
+<code>AcidTables</code>
+and returns a new
+<code>Transaction</code>
+instance. The acid tables are light-weight, serializable objects that
+are used by the mutation writing components of the API to target
+specific ACID file locations. Usually your
+<code>MutatorClient</code>
+will be running on some master node and your coordinators on worker
+nodes. In this event the
+<code>AcidTableSerializer</code>
+can be used to encode the tables in a more transportable form, for use
+as a
+<code>Configuration</code>
+property for example.
+</p>
+<p>
+As you would expect, a
+<code>Transaction</code>
+must be initiated with a call to
+<code>begin</code>
+before any mutations can be applied. This invocation acquires a lock on
+the targeted tables using the meta store, and initiates a heartbeat to
+prevent transaction timeouts. It is highly recommended that you register
+a
+<code>LockFailureListener</code>
+with the client so that your process can handle any lock or transaction
+failures. Typically you may wish to abort the job in the event of such
+an error. With the transaction in place you can now start streaming
+mutations with one or more
+<code>MutatorCoordinator</code>
+instances (more on this later), can can finally
+<code>commit</code>
+or
+<code>abort</code>
+the transaction when the change set has been applied, which will release
+the lock with the meta store client. Finally you should
+<code>close</code>
+the mutation client to release any held resources.
+</p>
+<p>
+The
+<code>MutatorClientBuilder</code>
+is provided to simplify the construction of clients.
+</p>
+
+<p>
+<b>WARNING:</b> Hive doesn't currently have a deadlock detector (it is
+being worked on as part of <a
+href="https://issues.apache.org/jira/browse/HIVE-9675">HIVE-9675</a>).
+This API could potentially deadlock with other stream writers or with
+SQL users.
+</p>
+<h2>Writing data</h2>
+
+<p>
+The
+<code>MutatorCoordinator</code>
+class is used to issue mutations to an ACID table. You will require at
+least one instance per table participating in the transaction. The
+target of a given instance is defined by the respective
+<code>AcidTable</code>
+used to construct the coordinator. It is recommended that a
+<code>MutatorClientBuilder</code>
+is used to simplify the construction process.
+</p>
+
+<p>
+Mutations can be applied by invoking the respective
+<code>insert</code>
+,
+<code>update</code>
+, and
+<code>delete</code>
+methods on the coordinator. These methods each take as parameters the
+target partition of the record and the mutated record. In the case of an
+unpartitioned table you should simply pass an empty list as the
+partition value. For inserts specifically, only the bucket id will be
+extracted from the
+<code>RecordIdentifier</code>
+, the transactionId and rowId will be ignored and replaced by
+appropriate values in the
+<code>RecordUpdater</code>
+. Additionally, in the case of deletes, everything but the
+<code>RecordIdentifier</code>
+in the record will be ignored and therefore it is often easier to simply
+submit the original record.
+</p>
+
+<p>
+<b>Caution:</b> As mentioned previously, mutations must arrive in
+specific order for the resultant table data to be consistent.
+Coordinators will verify a naturally ordered sequence of
+(lastTransactionId, rowId) and will throw an exception if this sequence
+is broken. This exception should almost certainly be escalated so that
+the transaction is aborted. This, along with the correct ordering of the
+data, is the responsibility of the client using the API.
+</p>
+
+<h3>Dynamic Partition Creation:</h3>
+It is very likely to be desirable to have new partitions created
+automatically (say on a hourly basis). In such cases requiring the Hive
+admin to pre-create the necessary partitions may not be reasonable.
+Consequently the API allows coordinators to create partitions as needed
+(see:
+<code>MutatorClientBuilder.addTable(String, String, boolean)</code>
+). Partition creation being an atomic action, multiple coordinators can
+race to create the partition, but only one would succeed, so
+coordinators clients need not synchronize when creating a partition. The
+user of the coordinator process needs to be given write permissions on
+the Hive table in order to create partitions.
+
+<h2>Reading data</h2>
+
+<p>
+Although this API is concerned with writing changes to data, as
+previously stated we'll almost certainly have to read the existing data
+first to obtain the relevant
+<code>ROW_IDs</code>
+. Therefore it is worth noting that reading ACID data in a robust and
+consistent manner requires the following:
+<ol>
+<li>Obtaining a valid transaction list from the meta store (<code>ValidTxnList</code>).
+</li>
+<li>Acquiring a read-lock with the meta store and issuing
+heartbeats (<code>LockImpl</code> can help with this).
+</li>
+<li>Configuring the <code>OrcInputFormat</code> and then reading
+the data. Make sure that you also pull in the <code>ROW__ID</code>
+values. See: <code>AcidRecordReader.getRecordIdentifier</code>.
+</li>
+<li>Releasing the read-lock.</li>
+</ol>
+</p>
+
+<h2>Example</h2>
+<p>
+<img src="doc-files/system-overview.png" />
+</p>
+<p>So to recap, the sequence of events required to apply mutations
+to a dataset using the API is:</p>
+<ol>
+<li>Create a <code>MutatorClient</code> to manage a transaction for
+the targeted ACID tables. This set of tables should include any
+transactional destinations or sources. Don't forget to register a <code>LockFailureListener</code>
+so that you can handle transaction failures.
+</li>
+<li>Open a new <code>Transaction</code> with the client.
+</li>
+<li>Get the <code>AcidTables</code> from the client.
+</li>
+<li>Begin the transaction.</li>
+<li>Create at least one <code>MutatorCoordinator</code> for each
+table. The <code>AcidTableSerializer</code> can help you transport the <code>AcidTables</code>
+when your workers are in a distributed environment.
+</li>
+<li>Compute your mutation set (this is your ETL merge process).</li>
+<li>Append bucket ids to insertion records. A <code>BucketIdResolver</code>
+can help here.
+</li>
+<li>Group and sort your data appropriately.</li>
+<li>Issue mutation events to your coordinators.</li>
+<li>Close your coordinators.</li>
+<li>Abort or commit the transaction.</li>
+<li>Close your mutation client.</li>
+</ol>
+<p>
+See
+<code>ExampleUseCase</code>
+and
+<code>TestMutations.testUpdatesAndDeletes()</code>
+for some very simple usages.
+</p>
+
+</body>
+
+</html>
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdException.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdException.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdException.java
new file mode 100644
index 0000000..656324c
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdException.java
@@ -0,0 +1,11 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+public class BucketIdException extends WorkerException {
+
+ private static final long serialVersionUID = 1L;
+
+ BucketIdException(String message) {
+ super(message);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolver.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolver.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolver.java
new file mode 100644
index 0000000..dab2072
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolver.java
@@ -0,0 +1,11 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+/** Computes and appends bucket ids to records that are due to be inserted. */
+public interface BucketIdResolver {
+
+ Object attachBucketIdToRecord(Object record);
+
+ /** See: {@link org.apache.hadoop.hive.ql.exec.ReduceSinkOperator#computeBucketNumber(Object, int)}. */
+ int computeBucketId(Object record);
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolverImpl.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolverImpl.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolverImpl.java
new file mode 100644
index 0000000..dbed9e1
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolverImpl.java
@@ -0,0 +1,76 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+
+/**
+ * Implementation of a {@link BucketIdResolver} that includes the logic required to calculate a bucket id from a record
+ * that is consistent with Hive's own internal computation scheme.
+ */
+public class BucketIdResolverImpl implements BucketIdResolver {
+
+ private static final long INVALID_TRANSACTION_ID = -1L;
+ private static final long INVALID_ROW_ID = -1L;
+
+ private final SettableStructObjectInspector structObjectInspector;
+ private final StructField[] bucketFields;
+ private final int totalBuckets;
+ private final StructField recordIdentifierField;
+
+ /**
+ * Note that all column indexes are with respect to your record structure, not the Hive table structure. Bucket column
+ * indexes must be presented in the same order as they are in the Hive table definition.
+ */
+ public BucketIdResolverImpl(ObjectInspector objectInspector, int recordIdColumn, int totalBuckets, int[] bucketColumns) {
+ this.totalBuckets = totalBuckets;
+ if (!(objectInspector instanceof SettableStructObjectInspector)) {
+ throw new IllegalArgumentException("Serious problem, expected a StructObjectInspector, " + "but got a "
+ + objectInspector.getClass().getName());
+ }
+
+ if (bucketColumns.length < 1) {
+ throw new IllegalArgumentException("No bucket column indexes set.");
+ }
+ structObjectInspector = (SettableStructObjectInspector) objectInspector;
+ List<? extends StructField> structFields = structObjectInspector.getAllStructFieldRefs();
+
+ recordIdentifierField = structFields.get(recordIdColumn);
+
+ bucketFields = new StructField[bucketColumns.length];
+ for (int i = 0; i < bucketColumns.length; i++) {
+ int bucketColumnsIndex = bucketColumns[i];
+ bucketFields[i] = structFields.get(bucketColumnsIndex);
+ }
+ }
+
+ @Override
+ public Object attachBucketIdToRecord(Object record) {
+ int bucketId = computeBucketId(record);
+ RecordIdentifier recordIdentifier = new RecordIdentifier(INVALID_TRANSACTION_ID, bucketId, INVALID_ROW_ID);
+ structObjectInspector.setStructFieldData(record, recordIdentifierField, recordIdentifier);
+ return record;
+ }
+
+ /** Based on: {@link org.apache.hadoop.hive.ql.exec.ReduceSinkOperator#computeBucketNumber(Object, int)}. */
+ @Override
+ public int computeBucketId(Object record) {
+ int bucketId = 1;
+
+ for (int columnIndex = 0; columnIndex < bucketFields.length; columnIndex++) {
+ Object columnValue = structObjectInspector.getStructFieldData(record, bucketFields[columnIndex]);
+ bucketId = bucketId * 31 + ObjectInspectorUtils.hashCode(columnValue, bucketFields[columnIndex].getFieldObjectInspector());
+ }
+
+ if (bucketId < 0) {
+ bucketId = -1 * bucketId;
+ }
+
+ return bucketId % totalBuckets;
+ }
+
+}
[2/3] hive git commit: HIVE-10165 Improve hive-hcatalog-streaming
extensibility and support updates and deletes (Eliot West via gates)
Posted by ga...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/CreatePartitionHelper.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/CreatePartitionHelper.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/CreatePartitionHelper.java
new file mode 100644
index 0000000..9aab346
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/CreatePartitionHelper.java
@@ -0,0 +1,83 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import java.util.List;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.thrift.TException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Utility class that can create new table partitions within the {@link IMetaStoreClient meta store}. */
+class CreatePartitionHelper {
+
+ private static final Logger LOG = LoggerFactory.getLogger(CreatePartitionHelper.class);
+
+ private final IMetaStoreClient metaStoreClient;
+ private final String databaseName;
+ private final String tableName;
+
+ CreatePartitionHelper(IMetaStoreClient metaStoreClient, String databaseName, String tableName) {
+ this.metaStoreClient = metaStoreClient;
+ this.databaseName = databaseName;
+ this.tableName = tableName;
+ }
+
+ /** Returns the expected {@link Path} for a given partition value. */
+ Path getPathForPartition(List<String> newPartitionValues) throws WorkerException {
+ try {
+ String location;
+ if (newPartitionValues.isEmpty()) {
+ location = metaStoreClient.getTable(databaseName, tableName).getSd().getLocation();
+ } else {
+ location = metaStoreClient.getPartition(databaseName, tableName, newPartitionValues).getSd().getLocation();
+ }
+ LOG.debug("Found path {} for partition {}", location, newPartitionValues);
+ return new Path(location);
+ } catch (NoSuchObjectException e) {
+ throw new WorkerException("Table not found '" + databaseName + "." + tableName + "'.", e);
+ } catch (TException e) {
+ throw new WorkerException("Failed to get path for partitions '" + newPartitionValues + "' on table '"
+ + databaseName + "." + tableName + "' with meta store: " + metaStoreClient, e);
+ }
+ }
+
+ /** Creates the specified partition if it does not already exist. Does nothing if the table is unpartitioned. */
+ void createPartitionIfNotExists(List<String> newPartitionValues) throws WorkerException {
+ if (newPartitionValues.isEmpty()) {
+ return;
+ }
+
+ try {
+ LOG.debug("Attempting to create partition (if not exists) {}.{}:{}", databaseName, tableName, newPartitionValues);
+ Table table = metaStoreClient.getTable(databaseName, tableName);
+
+ Partition partition = new Partition();
+ partition.setDbName(table.getDbName());
+ partition.setTableName(table.getTableName());
+ StorageDescriptor partitionSd = new StorageDescriptor(table.getSd());
+ partitionSd.setLocation(table.getSd().getLocation() + Path.SEPARATOR
+ + Warehouse.makePartName(table.getPartitionKeys(), newPartitionValues));
+ partition.setSd(partitionSd);
+ partition.setValues(newPartitionValues);
+
+ metaStoreClient.add_partition(partition);
+ } catch (AlreadyExistsException e) {
+ LOG.debug("Partition already exisits: {}.{}:{}", databaseName, tableName, newPartitionValues);
+ } catch (NoSuchObjectException e) {
+ LOG.error("Failed to create partition : " + newPartitionValues, e);
+ throw new PartitionCreationException("Table not found '" + databaseName + "." + tableName + "'.", e);
+ } catch (TException e) {
+ LOG.error("Failed to create partition : " + newPartitionValues, e);
+ throw new PartitionCreationException("Failed to create partition '" + newPartitionValues + "' on table '"
+ + databaseName + "." + tableName + "'", e);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/GroupRevisitedException.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/GroupRevisitedException.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/GroupRevisitedException.java
new file mode 100644
index 0000000..f8e46d6
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/GroupRevisitedException.java
@@ -0,0 +1,11 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+public class GroupRevisitedException extends WorkerException {
+
+ private static final long serialVersionUID = 1L;
+
+ GroupRevisitedException(String message) {
+ super(message);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/GroupingValidator.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/GroupingValidator.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/GroupingValidator.java
new file mode 100644
index 0000000..8ae3904
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/GroupingValidator.java
@@ -0,0 +1,74 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+
+/**
+ * Tracks the (partition, bucket) combinations that have been encountered, checking that a group is not revisited.
+ * Potentially memory intensive.
+ */
+class GroupingValidator {
+
+ private final Map<String, Set<Integer>> visited;
+ private final StringBuffer partitionKeyBuilder;
+ private long groups;
+ private String lastPartitionKey;
+ private int lastBucketId = -1;
+
+ GroupingValidator() {
+ visited = new HashMap<String, Set<Integer>>();
+ partitionKeyBuilder = new StringBuffer(64);
+ }
+
+ /**
+ * Checks that this group is either the same as the last or is a new group.
+ */
+ boolean isInSequence(List<String> partitionValues, int bucketId) {
+ String partitionKey = getPartitionKey(partitionValues);
+ if (Objects.equals(lastPartitionKey, partitionKey) && lastBucketId == bucketId) {
+ return true;
+ }
+ lastPartitionKey = partitionKey;
+ lastBucketId = bucketId;
+
+ Set<Integer> bucketIdSet = visited.get(partitionKey);
+ if (bucketIdSet == null) {
+ // If the bucket id set component of this data structure proves to be too large there is the
+ // option of moving it to Trove or HPPC in an effort to reduce size.
+ bucketIdSet = new HashSet<>();
+ visited.put(partitionKey, bucketIdSet);
+ }
+
+ boolean newGroup = bucketIdSet.add(bucketId);
+ if (newGroup) {
+ groups++;
+ }
+ return newGroup;
+ }
+
+ private String getPartitionKey(List<String> partitionValues) {
+ partitionKeyBuilder.setLength(0);
+ boolean first = true;
+ for (String element : partitionValues) {
+ if (first) {
+ first = false;
+ } else {
+ partitionKeyBuilder.append('/');
+ }
+ partitionKeyBuilder.append(element);
+ }
+ String partitionKey = partitionKeyBuilder.toString();
+ return partitionKey;
+ }
+
+ @Override
+ public String toString() {
+ return "GroupingValidator [groups=" + groups + ",lastPartitionKey=" + lastPartitionKey + ",lastBucketId="
+ + lastBucketId + "]";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/Mutator.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/Mutator.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/Mutator.java
new file mode 100644
index 0000000..96ecce9
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/Mutator.java
@@ -0,0 +1,21 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import java.io.Closeable;
+import java.io.Flushable;
+import java.io.IOException;
+
+/**
+ * Interface for submitting mutation events to a given partition and bucket in an ACID table. Requires records to arrive
+ * in the order defined by the {@link SequenceValidator}.
+ */
+public interface Mutator extends Closeable, Flushable {
+
+ void insert(Object record) throws IOException;
+
+ void update(Object record) throws IOException;
+
+ void delete(Object record) throws IOException;
+
+ void flush() throws IOException;
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorCoordinator.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorCoordinator.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorCoordinator.java
new file mode 100644
index 0000000..96f05e5
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorCoordinator.java
@@ -0,0 +1,281 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import java.io.Closeable;
+import java.io.Flushable;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+import org.apache.hadoop.hive.ql.io.RecordUpdater;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hive.hcatalog.streaming.mutate.client.AcidTable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Orchestrates the application of an ordered sequence of mutation events to a given ACID table. Events must be grouped
+ * by partition, then bucket and ordered by origTxnId, then rowId. Ordering is enforced by the {@link SequenceValidator}
+ * and grouping is by the {@link GroupingValidator}. An acid delta file is created for each combination partition, and
+ * bucket id (a single transaction id is implied). Once a delta file has been closed it cannot be reopened. Therefore
+ * care is needed as to group the data correctly otherwise failures will occur if a delta belonging to group has been
+ * previously closed. The {@link MutatorCoordinator} will seamlessly handle transitions between groups, creating and
+ * closing {@link Mutator Mutators} as needed to write to the appropriate partition and bucket. New partitions will be
+ * created in the meta store if {@link AcidTable#createPartitions()} is set.
+ * <p/>
+ * {@link #insert(List, Object) Insert} events must be artificially assigned appropriate bucket ids in the preceding
+ * grouping phase so that they are grouped correctly. Note that any transaction id or row id assigned to the
+ * {@link RecordIdentifier RecordIdentifier} of such events will be ignored by both the coordinator and the underlying
+ * {@link RecordUpdater}.
+ */
+public class MutatorCoordinator implements Closeable, Flushable {
+
+ private static final Logger LOG = LoggerFactory.getLogger(MutatorCoordinator.class);
+
+ private final IMetaStoreClient metaStoreClient;
+ private final MutatorFactory mutatorFactory;
+ private final GroupingValidator groupingValidator;
+ private final SequenceValidator sequenceValidator;
+ private final AcidTable table;
+ private final RecordInspector recordInspector;
+ private final CreatePartitionHelper partitionHelper;
+ private final AcidOutputFormat<?, ?> outputFormat;
+ private final BucketIdResolver bucketIdResolver;
+ private final HiveConf configuration;
+ private final boolean deleteDeltaIfExists;
+
+ private int bucketId;
+ private List<String> partitionValues;
+ private Path partitionPath;
+ private Mutator mutator;
+
+ MutatorCoordinator(IMetaStoreClient metaStoreClient, HiveConf configuration, MutatorFactory mutatorFactory,
+ AcidTable table, boolean deleteDeltaIfExists) throws WorkerException {
+ this(metaStoreClient, configuration, mutatorFactory, new CreatePartitionHelper(metaStoreClient,
+ table.getDatabaseName(), table.getTableName()), new GroupingValidator(), new SequenceValidator(), table,
+ deleteDeltaIfExists);
+ }
+
+ /** Visible for testing only. */
+ MutatorCoordinator(IMetaStoreClient metaStoreClient, HiveConf configuration, MutatorFactory mutatorFactory,
+ CreatePartitionHelper partitionHelper, GroupingValidator groupingValidator, SequenceValidator sequenceValidator,
+ AcidTable table, boolean deleteDeltaIfExists) throws WorkerException {
+ this.metaStoreClient = metaStoreClient;
+ this.configuration = configuration;
+ this.mutatorFactory = mutatorFactory;
+ this.partitionHelper = partitionHelper;
+ this.groupingValidator = groupingValidator;
+ this.sequenceValidator = sequenceValidator;
+ this.table = table;
+ this.deleteDeltaIfExists = deleteDeltaIfExists;
+ this.recordInspector = this.mutatorFactory.newRecordInspector();
+ bucketIdResolver = this.mutatorFactory.newBucketIdResolver(table.getTotalBuckets());
+
+ bucketId = -1;
+ outputFormat = createOutputFormat(table.getOutputFormatName(), configuration);
+ }
+
+ /**
+ * We expect records grouped by (partitionValues,bucketId) and ordered by (origTxnId,rowId).
+ *
+ * @throws BucketIdException The bucket ID in the {@link RecordIdentifier} of the record does not match that computed
+ * using the values in the record's bucketed columns.
+ * @throws RecordSequenceException The record was submitted that was not in the correct ascending (origTxnId, rowId)
+ * sequence.
+ * @throws GroupRevisitedException If an event was submitted for a (partition, bucketId) combination that has already
+ * been closed.
+ * @throws PartitionCreationException Could not create a new partition in the meta store.
+ * @throws WorkerException
+ */
+ public void insert(List<String> partitionValues, Object record) throws WorkerException {
+ reconfigureState(OperationType.INSERT, partitionValues, record);
+ try {
+ mutator.insert(record);
+ LOG.debug("Inserted into partition={}, record={}", partitionValues, record);
+ } catch (IOException e) {
+ throw new WorkerException("Failed to insert record '" + record + " using mutator '" + mutator + "'.", e);
+ }
+ }
+
+ /**
+ * We expect records grouped by (partitionValues,bucketId) and ordered by (origTxnId,rowId).
+ *
+ * @throws BucketIdException The bucket ID in the {@link RecordIdentifier} of the record does not match that computed
+ * using the values in the record's bucketed columns.
+ * @throws RecordSequenceException The record was submitted that was not in the correct ascending (origTxnId, rowId)
+ * sequence.
+ * @throws GroupRevisitedException If an event was submitted for a (partition, bucketId) combination that has already
+ * been closed.
+ * @throws PartitionCreationException Could not create a new partition in the meta store.
+ * @throws WorkerException
+ */
+ public void update(List<String> partitionValues, Object record) throws WorkerException {
+ reconfigureState(OperationType.UPDATE, partitionValues, record);
+ try {
+ mutator.update(record);
+ LOG.debug("Updated in partition={}, record={}", partitionValues, record);
+ } catch (IOException e) {
+ throw new WorkerException("Failed to update record '" + record + " using mutator '" + mutator + "'.", e);
+ }
+ }
+
+ /**
+ * We expect records grouped by (partitionValues,bucketId) and ordered by (origTxnId,rowId).
+ *
+ * @throws BucketIdException The bucket ID in the {@link RecordIdentifier} of the record does not match that computed
+ * using the values in the record's bucketed columns.
+ * @throws RecordSequenceException The record was submitted that was not in the correct ascending (origTxnId, rowId)
+ * sequence.
+ * @throws GroupRevisitedException If an event was submitted for a (partition, bucketId) combination that has already
+ * been closed.
+ * @throws PartitionCreationException Could not create a new partition in the meta store.
+ * @throws WorkerException
+ */
+ public void delete(List<String> partitionValues, Object record) throws WorkerException {
+ reconfigureState(OperationType.DELETE, partitionValues, record);
+ try {
+ mutator.delete(record);
+ LOG.debug("Deleted from partition={}, record={}", partitionValues, record);
+ } catch (IOException e) {
+ throw new WorkerException("Failed to delete record '" + record + " using mutator '" + mutator + "'.", e);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ try {
+ if (mutator != null) {
+ mutator.close();
+ }
+ } finally {
+ metaStoreClient.close();
+ }
+ }
+
+ @Override
+ public void flush() throws IOException {
+ if (mutator != null) {
+ mutator.flush();
+ }
+ }
+
+ private void reconfigureState(OperationType operationType, List<String> newPartitionValues, Object record)
+ throws WorkerException {
+ RecordIdentifier newRecordIdentifier = extractRecordIdentifier(operationType, newPartitionValues, record);
+ int newBucketId = newRecordIdentifier.getBucketId();
+
+ if (newPartitionValues == null) {
+ newPartitionValues = Collections.emptyList();
+ }
+
+ try {
+ if (partitionHasChanged(newPartitionValues)) {
+ if (table.createPartitions()) {
+ partitionHelper.createPartitionIfNotExists(newPartitionValues);
+ }
+ Path newPartitionPath = partitionHelper.getPathForPartition(newPartitionValues);
+ resetMutator(newBucketId, newPartitionValues, newPartitionPath);
+ } else if (bucketIdHasChanged(newBucketId)) {
+ resetMutator(newBucketId, partitionValues, partitionPath);
+ } else {
+ validateRecordSequence(operationType, newRecordIdentifier);
+ }
+ } catch (IOException e) {
+ throw new WorkerException("Failed to reset mutator when performing " + operationType + " of record: " + record, e);
+ }
+ }
+
+ private RecordIdentifier extractRecordIdentifier(OperationType operationType, List<String> newPartitionValues,
+ Object record) throws BucketIdException {
+ RecordIdentifier recordIdentifier = recordInspector.extractRecordIdentifier(record);
+ int computedBucketId = bucketIdResolver.computeBucketId(record);
+ if (operationType != OperationType.DELETE && recordIdentifier.getBucketId() != computedBucketId) {
+ throw new BucketIdException("RecordIdentifier.bucketId != computed bucketId (" + computedBucketId
+ + ") for record " + recordIdentifier + " in partition " + newPartitionValues + ".");
+ }
+ return recordIdentifier;
+ }
+
+ private void resetMutator(int newBucketId, List<String> newPartitionValues, Path newPartitionPath)
+ throws IOException, GroupRevisitedException {
+ if (mutator != null) {
+ mutator.close();
+ }
+ validateGrouping(newPartitionValues, newBucketId);
+ sequenceValidator.reset();
+ if (deleteDeltaIfExists) {
+ // TODO: Should this be the concern of the mutator?
+ deleteDeltaIfExists(newPartitionPath, table.getTransactionId(), newBucketId);
+ }
+ mutator = mutatorFactory.newMutator(outputFormat, table.getTransactionId(), newPartitionPath, newBucketId);
+ bucketId = newBucketId;
+ partitionValues = newPartitionValues;
+ partitionPath = newPartitionPath;
+ LOG.debug("Reset mutator: bucketId={}, partition={}, partitionPath={}", bucketId, partitionValues, partitionPath);
+ }
+
+ private boolean partitionHasChanged(List<String> newPartitionValues) {
+ boolean partitionHasChanged = !Objects.equals(this.partitionValues, newPartitionValues);
+ if (partitionHasChanged) {
+ LOG.debug("Partition changed from={}, to={}", this.partitionValues, newPartitionValues);
+ }
+ return partitionHasChanged;
+ }
+
+ private boolean bucketIdHasChanged(int newBucketId) {
+ boolean bucketIdHasChanged = this.bucketId != newBucketId;
+ if (bucketIdHasChanged) {
+ LOG.debug("Bucket ID changed from={}, to={}", this.bucketId, newBucketId);
+ }
+ return bucketIdHasChanged;
+ }
+
+ private void validateGrouping(List<String> newPartitionValues, int newBucketId) throws GroupRevisitedException {
+ if (!groupingValidator.isInSequence(newPartitionValues, bucketId)) {
+ throw new GroupRevisitedException("Group out of sequence: state=" + groupingValidator + ", partition="
+ + newPartitionValues + ", bucketId=" + newBucketId);
+ }
+ }
+
+ private void validateRecordSequence(OperationType operationType, RecordIdentifier newRecordIdentifier)
+ throws RecordSequenceException {
+ boolean identiferOutOfSequence = operationType != OperationType.INSERT
+ && !sequenceValidator.isInSequence(newRecordIdentifier);
+ if (identiferOutOfSequence) {
+ throw new RecordSequenceException("Records not in sequence: state=" + sequenceValidator + ", recordIdentifier="
+ + newRecordIdentifier);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ private AcidOutputFormat<?, ?> createOutputFormat(String outputFormatName, HiveConf configuration)
+ throws WorkerException {
+ try {
+ return (AcidOutputFormat<?, ?>) ReflectionUtils.newInstance(JavaUtils.loadClass(outputFormatName), configuration);
+ } catch (ClassNotFoundException e) {
+ throw new WorkerException("Could not locate class for '" + outputFormatName + "'.", e);
+ }
+ }
+
+ private void deleteDeltaIfExists(Path partitionPath, long transactionId, int bucketId) throws IOException {
+ Path deltaPath = AcidUtils.createFilename(partitionPath,
+ new AcidOutputFormat.Options(configuration)
+ .bucket(bucketId)
+ .minimumTransactionId(transactionId)
+ .maximumTransactionId(transactionId));
+ FileSystem fileSystem = deltaPath.getFileSystem(configuration);
+ if (fileSystem.exists(deltaPath)) {
+ LOG.info("Deleting existing delta path: {}", deltaPath);
+ fileSystem.delete(deltaPath, false);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorCoordinatorBuilder.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorCoordinatorBuilder.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorCoordinatorBuilder.java
new file mode 100644
index 0000000..8851ea6
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorCoordinatorBuilder.java
@@ -0,0 +1,76 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hive.hcatalog.common.HCatUtil;
+import org.apache.hive.hcatalog.streaming.mutate.HiveConfFactory;
+import org.apache.hive.hcatalog.streaming.mutate.UgiMetaStoreClientFactory;
+import org.apache.hive.hcatalog.streaming.mutate.client.AcidTable;
+
+/** Convenience class for building {@link MutatorCoordinator} instances. */
+public class MutatorCoordinatorBuilder {
+
+ private HiveConf configuration;
+ private MutatorFactory mutatorFactory;
+ private UserGroupInformation authenticatedUser;
+ private String metaStoreUri;
+ private AcidTable table;
+ private boolean deleteDeltaIfExists;
+
+ public MutatorCoordinatorBuilder configuration(HiveConf configuration) {
+ this.configuration = configuration;
+ return this;
+ }
+
+ public MutatorCoordinatorBuilder authenticatedUser(UserGroupInformation authenticatedUser) {
+ this.authenticatedUser = authenticatedUser;
+ return this;
+ }
+
+ public MutatorCoordinatorBuilder metaStoreUri(String metaStoreUri) {
+ this.metaStoreUri = metaStoreUri;
+ return this;
+ }
+
+ /** Set the destination ACID table for this client. */
+ public MutatorCoordinatorBuilder table(AcidTable table) {
+ this.table = table;
+ return this;
+ }
+
+ /**
+ * If the delta file already exists, delete it. THis is useful in a MapReduce setting where a number of task retries
+ * will attempt to write the same delta file.
+ */
+ public MutatorCoordinatorBuilder deleteDeltaIfExists() {
+ this.deleteDeltaIfExists = true;
+ return this;
+ }
+
+ public MutatorCoordinatorBuilder mutatorFactory(MutatorFactory mutatorFactory) {
+ this.mutatorFactory = mutatorFactory;
+ return this;
+ }
+
+ public MutatorCoordinator build() throws WorkerException, MetaException {
+ String user = authenticatedUser == null ? System.getProperty("user.name") : authenticatedUser.getShortUserName();
+ boolean secureMode = authenticatedUser == null ? false : authenticatedUser.hasKerberosCredentials();
+
+ configuration = HiveConfFactory.newInstance(configuration, this.getClass(), metaStoreUri);
+
+ IMetaStoreClient metaStoreClient;
+ try {
+ metaStoreClient = new UgiMetaStoreClientFactory(metaStoreUri, configuration, authenticatedUser, user, secureMode)
+ .newInstance(HCatUtil.getHiveMetastoreClient(configuration));
+ } catch (IOException e) {
+ throw new WorkerException("Could not create meta store client.", e);
+ }
+
+ return new MutatorCoordinator(metaStoreClient, configuration, mutatorFactory, table, deleteDeltaIfExists);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorFactory.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorFactory.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorFactory.java
new file mode 100644
index 0000000..850054f
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorFactory.java
@@ -0,0 +1,16 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
+
+public interface MutatorFactory {
+
+ Mutator newMutator(AcidOutputFormat<?, ?> outputFormat, long transactionId, Path partitionPath, int bucketId) throws IOException;
+
+ RecordInspector newRecordInspector();
+
+ BucketIdResolver newBucketIdResolver(int totalBuckets);
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorImpl.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorImpl.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorImpl.java
new file mode 100644
index 0000000..0fe41d5
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/MutatorImpl.java
@@ -0,0 +1,84 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
+import org.apache.hadoop.hive.ql.io.RecordUpdater;
+import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/** Base {@link Mutator} implementation. Creates a suitable {@link RecordUpdater} and delegates mutation events. */
+public class MutatorImpl implements Mutator {
+
+ private final long transactionId;
+ private final Path partitionPath;
+ private final int bucketId;
+ private final Configuration configuration;
+ private final int recordIdColumn;
+ private final ObjectInspector objectInspector;
+ private RecordUpdater updater;
+
+ public MutatorImpl(Configuration configuration, int recordIdColumn, ObjectInspector objectInspector,
+ AcidOutputFormat<?, ?> outputFormat, long transactionId, Path partitionPath, int bucketId) throws IOException {
+ this.configuration = configuration;
+ this.recordIdColumn = recordIdColumn;
+ this.objectInspector = objectInspector;
+ this.transactionId = transactionId;
+ this.partitionPath = partitionPath;
+ this.bucketId = bucketId;
+
+ updater = createRecordUpdater(outputFormat);
+ }
+
+ @Override
+ public void insert(Object record) throws IOException {
+ updater.insert(transactionId, record);
+ }
+
+ @Override
+ public void update(Object record) throws IOException {
+ updater.update(transactionId, record);
+ }
+
+ @Override
+ public void delete(Object record) throws IOException {
+ updater.delete(transactionId, record);
+ }
+
+ /**
+ * This implementation does intentionally nothing at this time. We only use a single transaction and
+ * {@link OrcRecordUpdater#flush()} will purposefully throw and exception in this instance. We keep this here in the
+ * event that we support multiple transactions and to make it clear that the omission of an invocation of
+ * {@link OrcRecordUpdater#flush()} was not a mistake.
+ */
+ @Override
+ public void flush() throws IOException {
+ // Intentionally do nothing
+ }
+
+ @Override
+ public void close() throws IOException {
+ updater.close(false);
+ updater = null;
+ }
+
+ @Override
+ public String toString() {
+ return "ObjectInspectorMutator [transactionId=" + transactionId + ", partitionPath=" + partitionPath
+ + ", bucketId=" + bucketId + "]";
+ }
+
+ protected RecordUpdater createRecordUpdater(AcidOutputFormat<?, ?> outputFormat) throws IOException {
+ return outputFormat.getRecordUpdater(
+ partitionPath,
+ new AcidOutputFormat.Options(configuration)
+ .inspector(objectInspector)
+ .bucket(bucketId)
+ .minimumTransactionId(transactionId)
+ .maximumTransactionId(transactionId)
+ .recordIdColumn(recordIdColumn));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/OperationType.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/OperationType.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/OperationType.java
new file mode 100644
index 0000000..5ecb1bb
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/OperationType.java
@@ -0,0 +1,7 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+enum OperationType {
+ INSERT,
+ UPDATE,
+ DELETE;
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/PartitionCreationException.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/PartitionCreationException.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/PartitionCreationException.java
new file mode 100644
index 0000000..5b59e01
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/PartitionCreationException.java
@@ -0,0 +1,15 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+public class PartitionCreationException extends WorkerException {
+
+ private static final long serialVersionUID = 1L;
+
+ PartitionCreationException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ PartitionCreationException(String message) {
+ super(message);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/RecordInspector.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/RecordInspector.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/RecordInspector.java
new file mode 100644
index 0000000..11ef0dd
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/RecordInspector.java
@@ -0,0 +1,11 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+
+/** Provide a means to extract {@link RecordIdentifier} from record objects. */
+public interface RecordInspector {
+
+ /** Get the {@link RecordIdentifier} from the record - to be used for updates and deletes only. */
+ RecordIdentifier extractRecordIdentifier(Object record);
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/RecordInspectorImpl.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/RecordInspectorImpl.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/RecordInspectorImpl.java
new file mode 100644
index 0000000..18ee458
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/RecordInspectorImpl.java
@@ -0,0 +1,45 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * Standard {@link RecordInspector} implementation that uses the supplied {@link ObjectInspector} and
+ * {@link AcidOutputFormat.Options#recordIdColumn(int) record id column} to extract {@link RecordIdentifier
+ * RecordIdentifiers}, and calculate bucket ids from records.
+ */
+public class RecordInspectorImpl implements RecordInspector {
+
+ private final StructObjectInspector structObjectInspector;
+ private final StructField recordIdentifierField;
+
+ /**
+ * Note that all column indexes are with respect to your record structure, not the Hive table structure.
+ */
+ public RecordInspectorImpl(ObjectInspector objectInspector, int recordIdColumn) {
+ if (!(objectInspector instanceof StructObjectInspector)) {
+ throw new IllegalArgumentException("Serious problem, expected a StructObjectInspector, " + "but got a "
+ + objectInspector.getClass().getName());
+ }
+
+ structObjectInspector = (StructObjectInspector) objectInspector;
+ List<? extends StructField> structFields = structObjectInspector.getAllStructFieldRefs();
+ recordIdentifierField = structFields.get(recordIdColumn);
+ }
+
+ public RecordIdentifier extractRecordIdentifier(Object record) {
+ return (RecordIdentifier) structObjectInspector.getStructFieldData(record, recordIdentifierField);
+ }
+
+ @Override
+ public String toString() {
+ return "RecordInspectorImpl [structObjectInspector=" + structObjectInspector + ", recordIdentifierField="
+ + recordIdentifierField + "]";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/RecordSequenceException.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/RecordSequenceException.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/RecordSequenceException.java
new file mode 100644
index 0000000..6b034f1
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/RecordSequenceException.java
@@ -0,0 +1,11 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+public class RecordSequenceException extends WorkerException {
+
+ private static final long serialVersionUID = 1L;
+
+ RecordSequenceException(String message) {
+ super(message);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/SequenceValidator.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/SequenceValidator.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/SequenceValidator.java
new file mode 100644
index 0000000..bcff4d6
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/SequenceValidator.java
@@ -0,0 +1,49 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Verifies that the sequence of {@link RecordIdentifier RecordIdentifiers} are in a valid order for insertion into an
+ * ACID delta file in a given partition and bucket.
+ */
+class SequenceValidator {
+
+ private static final Logger LOG = LoggerFactory.getLogger(SequenceValidator.class);
+
+ private Long lastTxId;
+ private Long lastRowId;
+
+ SequenceValidator() {
+ }
+
+ boolean isInSequence(RecordIdentifier recordIdentifier) {
+ if (lastTxId != null && recordIdentifier.getTransactionId() < lastTxId) {
+ LOG.debug("Non-sequential transaction ID. Expected >{}, recordIdentifier={}", lastTxId, recordIdentifier);
+ return false;
+ } else if (lastTxId != null && recordIdentifier.getTransactionId() == lastTxId && lastRowId != null
+ && recordIdentifier.getRowId() <= lastRowId) {
+ LOG.debug("Non-sequential row ID. Expected >{}, recordIdentifier={}", lastRowId, recordIdentifier);
+ return false;
+ }
+ lastTxId = recordIdentifier.getTransactionId();
+ lastRowId = recordIdentifier.getRowId();
+ return true;
+ }
+
+ /**
+ * Validator must be reset for each new partition and or bucket.
+ */
+ void reset() {
+ lastTxId = null;
+ lastRowId = null;
+ LOG.debug("reset");
+ }
+
+ @Override
+ public String toString() {
+ return "SequenceValidator [lastTxId=" + lastTxId + ", lastRowId=" + lastRowId + "]";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/WorkerException.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/WorkerException.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/WorkerException.java
new file mode 100644
index 0000000..1fa1998
--- /dev/null
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/WorkerException.java
@@ -0,0 +1,15 @@
+package org.apache.hive.hcatalog.streaming.mutate.worker;
+
+public class WorkerException extends Exception {
+
+ private static final long serialVersionUID = 1L;
+
+ WorkerException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ WorkerException(String message) {
+ super(message);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/ExampleUseCase.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/ExampleUseCase.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/ExampleUseCase.java
new file mode 100644
index 0000000..86d70d4
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/ExampleUseCase.java
@@ -0,0 +1,82 @@
+package org.apache.hive.hcatalog.streaming.mutate;
+
+import java.util.List;
+
+import org.apache.hive.hcatalog.streaming.mutate.client.MutatorClient;
+import org.apache.hive.hcatalog.streaming.mutate.client.MutatorClientBuilder;
+import org.apache.hive.hcatalog.streaming.mutate.client.AcidTable;
+import org.apache.hive.hcatalog.streaming.mutate.client.Transaction;
+import org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver;
+import org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinator;
+import org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinatorBuilder;
+import org.apache.hive.hcatalog.streaming.mutate.worker.MutatorFactory;
+
+public class ExampleUseCase {
+
+ private String metaStoreUri;
+ private String databaseName;
+ private String tableName;
+ private boolean createPartitions = true;
+ private List<String> partitionValues1, partitionValues2, partitionValues3;
+ private Object record1, record2, record3;
+ private MutatorFactory mutatorFactory;
+
+ /* This is an illustration, not a functioning example. */
+ public void example() throws Exception {
+ // CLIENT/TOOL END
+ //
+ // Singleton instance in the job client
+
+ // Create a client to manage our transaction
+ MutatorClient client = new MutatorClientBuilder()
+ .addSinkTable(databaseName, tableName, createPartitions)
+ .metaStoreUri(metaStoreUri)
+ .build();
+
+ // Get the transaction
+ Transaction transaction = client.newTransaction();
+
+ // Get serializable details of the destination tables
+ List<AcidTable> tables = client.getTables();
+
+ transaction.begin();
+
+ // CLUSTER / WORKER END
+ //
+ // Job submitted to the cluster
+ //
+
+ BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(tables.get(0).getTotalBuckets());
+ record1 = bucketIdResolver.attachBucketIdToRecord(record1);
+
+ // --------------------------------------------------------------
+ // DATA SHOULD GET SORTED BY YOUR ETL/MERGE PROCESS HERE
+ //
+ // Group the data by (partitionValues, ROW__ID.bucketId)
+ // Order the groups by (ROW__ID.lastTransactionId, ROW__ID.rowId)
+ // --------------------------------------------------------------
+
+ // One of these runs at the output of each reducer
+ //
+ MutatorCoordinator coordinator = new MutatorCoordinatorBuilder()
+ .metaStoreUri(metaStoreUri)
+ .table(tables.get(0))
+ .mutatorFactory(mutatorFactory)
+ .build();
+
+ coordinator.insert(partitionValues1, record1);
+ coordinator.update(partitionValues2, record2);
+ coordinator.delete(partitionValues3, record3);
+
+ coordinator.close();
+
+ // CLIENT/TOOL END
+ //
+ // The tasks have completed, control is back at the tool
+
+ transaction.commit();
+
+ client.close();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/MutableRecord.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/MutableRecord.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/MutableRecord.java
new file mode 100644
index 0000000..0d87a31
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/MutableRecord.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.hcatalog.streaming.mutate;
+
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+import org.apache.hadoop.io.Text;
+
+public class MutableRecord {
+
+ // Column 0
+ public final int id;
+ // Column 1
+ public final Text msg;
+ // Column 2
+ public RecordIdentifier rowId;
+
+ public MutableRecord(int id, String msg, RecordIdentifier rowId) {
+ this.id = id;
+ this.msg = new Text(msg);
+ this.rowId = rowId;
+ }
+
+ public MutableRecord(int id, String msg) {
+ this.id = id;
+ this.msg = new Text(msg);
+ rowId = null;
+ }
+
+ @Override
+ public String toString() {
+ return "MutableRecord [id=" + id + ", msg=" + msg + ", rowId=" + rowId + "]";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/ReflectiveMutatorFactory.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/ReflectiveMutatorFactory.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/ReflectiveMutatorFactory.java
new file mode 100644
index 0000000..2a851c8
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/ReflectiveMutatorFactory.java
@@ -0,0 +1,51 @@
+package org.apache.hive.hcatalog.streaming.mutate;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver;
+import org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolverImpl;
+import org.apache.hive.hcatalog.streaming.mutate.worker.Mutator;
+import org.apache.hive.hcatalog.streaming.mutate.worker.MutatorFactory;
+import org.apache.hive.hcatalog.streaming.mutate.worker.MutatorImpl;
+import org.apache.hive.hcatalog.streaming.mutate.worker.RecordInspector;
+import org.apache.hive.hcatalog.streaming.mutate.worker.RecordInspectorImpl;
+
+public class ReflectiveMutatorFactory implements MutatorFactory {
+
+ private final int recordIdColumn;
+ private final ObjectInspector objectInspector;
+ private final Configuration configuration;
+ private final int[] bucketColumnIndexes;
+
+ public ReflectiveMutatorFactory(Configuration configuration, Class<?> recordClass, int recordIdColumn,
+ int[] bucketColumnIndexes) {
+ this.configuration = configuration;
+ this.recordIdColumn = recordIdColumn;
+ this.bucketColumnIndexes = bucketColumnIndexes;
+ objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(recordClass,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+
+ @Override
+ public Mutator newMutator(AcidOutputFormat<?, ?> outputFormat, long transactionId, Path partitionPath, int bucketId)
+ throws IOException {
+ return new MutatorImpl(configuration, recordIdColumn, objectInspector, outputFormat, transactionId, partitionPath,
+ bucketId);
+ }
+
+ @Override
+ public RecordInspector newRecordInspector() {
+ return new RecordInspectorImpl(objectInspector, recordIdColumn);
+ }
+
+ @Override
+ public BucketIdResolver newBucketIdResolver(int totalBuckets) {
+ return new BucketIdResolverImpl(objectInspector, recordIdColumn, totalBuckets, bucketColumnIndexes);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/StreamingAssert.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/StreamingAssert.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/StreamingAssert.java
new file mode 100644
index 0000000..477ed8c
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/StreamingAssert.java
@@ -0,0 +1,191 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.hcatalog.streaming.mutate;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.ValidTxnList;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.io.AcidInputFormat.AcidRecordReader;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.AcidUtils.Directory;
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.thrift.TException;
+
+public class StreamingAssert {
+
+ public static class Factory {
+ private IMetaStoreClient metaStoreClient;
+ private final HiveConf conf;
+
+ public Factory(IMetaStoreClient metaStoreClient, HiveConf conf) {
+ this.metaStoreClient = metaStoreClient;
+ this.conf = conf;
+ }
+
+ public StreamingAssert newStreamingAssert(Table table) throws Exception {
+ return newStreamingAssert(table, Collections.<String> emptyList());
+ }
+
+ public StreamingAssert newStreamingAssert(Table table, List<String> partition) throws Exception {
+ return new StreamingAssert(metaStoreClient, conf, table, partition);
+ }
+ }
+
+ private Table table;
+ private List<String> partition;
+ private IMetaStoreClient metaStoreClient;
+ private Directory dir;
+ private ValidTxnList txns;
+ private List<AcidUtils.ParsedDelta> currentDeltas;
+ private long min;
+ private long max;
+ private Path partitionLocation;
+
+ StreamingAssert(IMetaStoreClient metaStoreClient, HiveConf conf, Table table, List<String> partition)
+ throws Exception {
+ this.metaStoreClient = metaStoreClient;
+ this.table = table;
+ this.partition = partition;
+
+ txns = metaStoreClient.getValidTxns();
+ partitionLocation = getPartitionLocation();
+ dir = AcidUtils.getAcidState(partitionLocation, conf, txns);
+ assertEquals(0, dir.getObsolete().size());
+ assertEquals(0, dir.getOriginalFiles().size());
+
+ currentDeltas = dir.getCurrentDirectories();
+ min = Long.MAX_VALUE;
+ max = Long.MIN_VALUE;
+ System.out.println("Files found: ");
+ for (AcidUtils.ParsedDelta parsedDelta : currentDeltas) {
+ System.out.println(parsedDelta.getPath().toString());
+ max = Math.max(parsedDelta.getMaxTransaction(), max);
+ min = Math.min(parsedDelta.getMinTransaction(), min);
+ }
+ }
+
+ public void assertExpectedFileCount(int expectedFileCount) {
+ assertEquals(expectedFileCount, currentDeltas.size());
+ }
+
+ public void assertNothingWritten() {
+ assertExpectedFileCount(0);
+ }
+
+ public void assertMinTransactionId(long expectedMinTransactionId) {
+ if (currentDeltas.isEmpty()) {
+ throw new AssertionError("No data");
+ }
+ assertEquals(expectedMinTransactionId, min);
+ }
+
+ public void assertMaxTransactionId(long expectedMaxTransactionId) {
+ if (currentDeltas.isEmpty()) {
+ throw new AssertionError("No data");
+ }
+ assertEquals(expectedMaxTransactionId, max);
+ }
+
+ List<Record> readRecords() throws Exception {
+ if (currentDeltas.isEmpty()) {
+ throw new AssertionError("No data");
+ }
+ InputFormat<NullWritable, OrcStruct> inputFormat = new OrcInputFormat();
+ JobConf job = new JobConf();
+ job.set("mapred.input.dir", partitionLocation.toString());
+ job.set("bucket_count", Integer.toString(table.getSd().getNumBuckets()));
+ job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
+ InputSplit[] splits = inputFormat.getSplits(job, 1);
+ assertEquals(1, splits.length);
+
+ final AcidRecordReader<NullWritable, OrcStruct> recordReader = (AcidRecordReader<NullWritable, OrcStruct>) inputFormat
+ .getRecordReader(splits[0], job, Reporter.NULL);
+
+ NullWritable key = recordReader.createKey();
+ OrcStruct value = recordReader.createValue();
+
+ List<Record> records = new ArrayList<>();
+ while (recordReader.next(key, value)) {
+ RecordIdentifier recordIdentifier = recordReader.getRecordIdentifier();
+ Record record = new Record(new RecordIdentifier(recordIdentifier.getTransactionId(),
+ recordIdentifier.getBucketId(), recordIdentifier.getRowId()), value.toString());
+ System.out.println(record);
+ records.add(record);
+ }
+ recordReader.close();
+ return records;
+ }
+
+ private Path getPartitionLocation() throws NoSuchObjectException, MetaException, TException {
+ Path partitionLocacation;
+ if (partition.isEmpty()) {
+ partitionLocacation = new Path(table.getSd().getLocation());
+ } else {
+ // TODO: calculate this instead. Just because we're writing to the location doesn't mean that it'll
+ // always be wanted in the meta store right away.
+ List<Partition> partitionEntries = metaStoreClient.listPartitions(table.getDbName(), table.getTableName(),
+ partition, (short) 1);
+ partitionLocacation = new Path(partitionEntries.get(0).getSd().getLocation());
+ }
+ return partitionLocacation;
+ }
+
+ public static class Record {
+ private RecordIdentifier recordIdentifier;
+ private String row;
+
+ Record(RecordIdentifier recordIdentifier, String row) {
+ this.recordIdentifier = recordIdentifier;
+ this.row = row;
+ }
+
+ public RecordIdentifier getRecordIdentifier() {
+ return recordIdentifier;
+ }
+
+ public String getRow() {
+ return row;
+ }
+
+ @Override
+ public String toString() {
+ return "Record [recordIdentifier=" + recordIdentifier + ", row=" + row + "]";
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/994d98c0/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/StreamingTestUtils.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/StreamingTestUtils.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/StreamingTestUtils.java
new file mode 100644
index 0000000..f8c8537
--- /dev/null
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/StreamingTestUtils.java
@@ -0,0 +1,261 @@
+package org.apache.hive.hcatalog.streaming.mutate;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.thrift.TException;
+
+public class StreamingTestUtils {
+
+ public HiveConf newHiveConf(String metaStoreUri) {
+ HiveConf conf = new HiveConf(this.getClass());
+ conf.set("fs.raw.impl", RawFileSystem.class.getName());
+ if (metaStoreUri != null) {
+ conf.setVar(HiveConf.ConfVars.METASTOREURIS, metaStoreUri);
+ }
+ conf.setBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI, true);
+ conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, true);
+ return conf;
+ }
+
+ public void prepareTransactionDatabase(HiveConf conf) throws Exception {
+ TxnDbUtil.setConfValues(conf);
+ TxnDbUtil.cleanDb();
+ TxnDbUtil.prepDb();
+ }
+
+ public IMetaStoreClient newMetaStoreClient(HiveConf conf) throws Exception {
+ return new HiveMetaStoreClient(conf);
+ }
+
+ public static class RawFileSystem extends RawLocalFileSystem {
+ private static final URI NAME;
+ static {
+ try {
+ NAME = new URI("raw:///");
+ } catch (URISyntaxException se) {
+ throw new IllegalArgumentException("bad uri", se);
+ }
+ }
+
+ @Override
+ public URI getUri() {
+ return NAME;
+ }
+
+ @Override
+ public FileStatus getFileStatus(Path path) throws IOException {
+ File file = pathToFile(path);
+ if (!file.exists()) {
+ throw new FileNotFoundException("Can't find " + path);
+ }
+ // get close enough
+ short mod = 0;
+ if (file.canRead()) {
+ mod |= 0444;
+ }
+ if (file.canWrite()) {
+ mod |= 0200;
+ }
+ if (file.canExecute()) {
+ mod |= 0111;
+ }
+ return new FileStatus(file.length(), file.isDirectory(), 1, 1024, file.lastModified(), file.lastModified(),
+ FsPermission.createImmutable(mod), "owen", "users", path);
+ }
+ }
+
+ public static DatabaseBuilder databaseBuilder(File warehouseFolder) {
+ return new DatabaseBuilder(warehouseFolder);
+ }
+
+ public static class DatabaseBuilder {
+
+ private Database database;
+ private File warehouseFolder;
+
+ public DatabaseBuilder(File warehouseFolder) {
+ this.warehouseFolder = warehouseFolder;
+ database = new Database();
+ }
+
+ public DatabaseBuilder name(String name) {
+ database.setName(name);
+ File databaseFolder = new File(warehouseFolder, name + ".db");
+ String databaseLocation = "raw://" + databaseFolder.toURI().getPath();
+ database.setLocationUri(databaseLocation);
+ return this;
+ }
+
+ public Database dropAndCreate(IMetaStoreClient metaStoreClient) throws Exception {
+ if (metaStoreClient == null) {
+ throw new IllegalArgumentException();
+ }
+ try {
+ for (String table : metaStoreClient.listTableNamesByFilter(database.getName(), "", (short) -1)) {
+ metaStoreClient.dropTable(database.getName(), table, true, true);
+ }
+ metaStoreClient.dropDatabase(database.getName());
+ } catch (TException e) {
+ }
+ metaStoreClient.createDatabase(database);
+ return database;
+ }
+
+ public Database build() {
+ return database;
+ }
+
+ }
+
+ public static TableBuilder tableBuilder(Database database) {
+ return new TableBuilder(database);
+ }
+
+ public static class TableBuilder {
+
+ private Table table;
+ private StorageDescriptor sd;
+ private SerDeInfo serDeInfo;
+ private Database database;
+ private List<List<String>> partitions;
+ private List<String> columnNames;
+ private List<String> columnTypes;
+ private List<String> partitionKeys;
+
+ public TableBuilder(Database database) {
+ this.database = database;
+ partitions = new ArrayList<>();
+ columnNames = new ArrayList<>();
+ columnTypes = new ArrayList<>();
+ partitionKeys = Collections.emptyList();
+ table = new Table();
+ table.setDbName(database.getName());
+ table.setTableType(TableType.MANAGED_TABLE.toString());
+ Map<String, String> tableParams = new HashMap<String, String>();
+ tableParams.put("transactional", Boolean.TRUE.toString());
+ table.setParameters(tableParams);
+
+ sd = new StorageDescriptor();
+ sd.setInputFormat(HiveInputFormat.class.getName());
+ sd.setOutputFormat(OrcOutputFormat.class.getName());
+ sd.setNumBuckets(1);
+ table.setSd(sd);
+
+ serDeInfo = new SerDeInfo();
+ serDeInfo.setParameters(new HashMap<String, String>());
+ serDeInfo.getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
+ serDeInfo.setSerializationLib(OrcSerde.class.getName());
+ sd.setSerdeInfo(serDeInfo);
+ }
+
+ public TableBuilder name(String name) {
+ sd.setLocation(database.getLocationUri() + Path.SEPARATOR + name);
+ table.setTableName(name);
+ serDeInfo.setName(name);
+ return this;
+ }
+
+ public TableBuilder buckets(int buckets) {
+ sd.setNumBuckets(buckets);
+ return this;
+ }
+
+ public TableBuilder addColumn(String columnName, String columnType) {
+ columnNames.add(columnName);
+ columnTypes.add(columnType);
+ return this;
+ }
+
+ public TableBuilder partitionKeys(String... partitionKeys) {
+ this.partitionKeys = Arrays.asList(partitionKeys);
+ return this;
+ }
+
+ public TableBuilder addPartition(String... partitionValues) {
+ partitions.add(Arrays.asList(partitionValues));
+ return this;
+ }
+
+ public TableBuilder addPartition(List<String> partitionValues) {
+ partitions.add(partitionValues);
+ return this;
+ }
+
+ public Table create(IMetaStoreClient metaStoreClient) throws Exception {
+ if (metaStoreClient == null) {
+ throw new IllegalArgumentException();
+ }
+ return internalCreate(metaStoreClient);
+ }
+
+ public Table build() throws Exception {
+ return internalCreate(null);
+ }
+
+ private Table internalCreate(IMetaStoreClient metaStoreClient) throws Exception {
+ List<FieldSchema> fields = new ArrayList<FieldSchema>(columnNames.size());
+ for (int i = 0; i < columnNames.size(); i++) {
+ fields.add(new FieldSchema(columnNames.get(i), columnTypes.get(i), ""));
+ }
+ sd.setCols(fields);
+
+ if (!partitionKeys.isEmpty()) {
+ List<FieldSchema> partitionFields = new ArrayList<FieldSchema>();
+ for (String partitionKey : partitionKeys) {
+ partitionFields.add(new FieldSchema(partitionKey, serdeConstants.STRING_TYPE_NAME, ""));
+ }
+ table.setPartitionKeys(partitionFields);
+ }
+ if (metaStoreClient != null) {
+ metaStoreClient.createTable(table);
+ }
+
+ for (List<String> partitionValues : partitions) {
+ Partition partition = new Partition();
+ partition.setDbName(database.getName());
+ partition.setTableName(table.getTableName());
+ StorageDescriptor partitionSd = new StorageDescriptor(table.getSd());
+ partitionSd.setLocation(table.getSd().getLocation() + Path.SEPARATOR
+ + Warehouse.makePartName(table.getPartitionKeys(), partitionValues));
+ partition.setSd(partitionSd);
+ partition.setValues(partitionValues);
+
+ if (metaStoreClient != null) {
+ metaStoreClient.add_partition(partition);
+ }
+ }
+ return table;
+ }
+ }
+
+}