You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by am...@apache.org on 2013/03/14 09:09:31 UTC
svn commit: r1456360 [2/2] - in /hive/branches/HIVE-4115/ql/src:
java/org/apache/hadoop/hive/ql/cube/
java/org/apache/hadoop/hive/ql/cube/metadata/
test/org/apache/hadoop/hive/ql/cube/
test/org/apache/hadoop/hive/ql/cube/metadata/
Added: hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/metadata/TestCubeMetastoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/metadata/TestCubeMetastoreClient.java?rev=1456360&view=auto
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/metadata/TestCubeMetastoreClient.java (added)
+++ hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/metadata/TestCubeMetastoreClient.java Thu Mar 14 08:09:31 2013
@@ -0,0 +1,419 @@
+package org.apache.hadoop.hive.ql.cube.metadata;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.avro.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestCubeMetastoreClient {
+
+ private CubeMetastoreClient client;
+
+ //cube members
+ private Cube cube;
+ private Set<CubeMeasure> cubeMeasures;
+ private Set<CubeDimension> cubeDimensions;
+ private final String cubeName = "testCube";
+ private Date now;
+
+ @Before
+ public void setup() throws HiveException {
+ client = CubeMetastoreClient.getInstance(new HiveConf(this.getClass()));
+ now = new Date();
+
+ defineCube();
+ }
+
+ @After
+ public void teardown() {
+ if (client != null) {
+ client.close();
+ }
+ }
+
+ private void defineCube() {
+ cubeMeasures = new HashSet<CubeMeasure>();
+ cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr1", "int",
+ "first measure")));
+ cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr2", "float",
+ "second measure"),
+ null, "SUM", "RS"));
+ cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr3", "double",
+ "third measure"),
+ null, "MAX", null));
+ cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr4", "bigint",
+ "fourth measure"),
+ null, "COUNT", null));
+ cubeMeasures.add(new ExprMeasure(new FieldSchema("msr5", "double",
+ "fifth measure"),
+ "avg(msr1 + msr2)"));
+ cubeMeasures.add(new ExprMeasure(new FieldSchema("msr6", "bigint",
+ "sixth measure"),
+ "(msr1 + msr2)/ msr4", "", "SUM", "RS"));
+
+ cubeDimensions = new HashSet<CubeDimension>();
+ List<BaseDimension> locationHierarchy = new ArrayList<BaseDimension>();
+ locationHierarchy.add(new ReferencedDimension(new FieldSchema("zipcode",
+ "int", "zip"), new TableReference("ziptable", "zipcode")));
+ locationHierarchy.add(new ReferencedDimension(new FieldSchema("cityid",
+ "int", "city"), new TableReference("citytable", "id")));
+ locationHierarchy.add(new ReferencedDimension(new FieldSchema("stateid",
+ "int", "state"), new TableReference("statetable", "id")));
+ locationHierarchy.add(new ReferencedDimension(new FieldSchema("countryid",
+ "int", "country"), new TableReference("countrytable", "id")));
+ List<String> regions = Arrays.asList("APAC", "EMEA", "USA");
+ locationHierarchy.add(new InlineDimension(new FieldSchema("regionname",
+ "string", "region"), regions));
+
+ cubeDimensions.add(new CubeDimension("location", locationHierarchy));
+ cubeDimensions.add(new CubeDimension("dim1", Arrays.asList(
+ new BaseDimension(new FieldSchema("dim1", "string", "basedim")))));
+ cubeDimensions.add(new CubeDimension("dim2", Arrays.asList(
+ (BaseDimension)(new ReferencedDimension(
+ new FieldSchema("dim2", "string", "ref dim"),
+ new TableReference("testdim2", "id"))))));
+ cubeDimensions.add(new CubeDimension("dim3", Arrays.asList(
+ (BaseDimension)(new InlineDimension(
+ new FieldSchema("region", "string", "region dim"), regions)))));
+ cube = new Cube(cubeName, cubeMeasures, cubeDimensions);
+ }
+
+ @Test
+ public void testCube() throws Exception {
+ client.createCube(cubeName, cubeMeasures, cubeDimensions);
+ Assert.assertTrue(client.tableExists(cubeName));
+ Table cubeTbl = client.getHiveTable(cubeName);
+ Cube cube2 = new Cube(cubeTbl);
+ Assert.assertTrue(cube.equals(cube2));
+ }
+
+ @Test
+ public void testCubeFact() throws Exception {
+ String factName = "testFact";
+ List<FieldSchema> factColumns = new ArrayList<FieldSchema>(
+ cubeMeasures.size());
+ for (CubeMeasure measure : cubeMeasures) {
+ factColumns.add(measure.getColumn());
+ }
+
+ // add one dimension of the cube
+ factColumns.add(new FieldSchema("zipcode","int", "zip"));
+
+ Map<String, List<UpdatePeriod>> updatePeriods =
+ new HashMap<String, List<UpdatePeriod>>();
+ Map<Storage, List<UpdatePeriod>> storageAggregatePeriods =
+ new HashMap<Storage, List<UpdatePeriod>>();
+ List<UpdatePeriod> updates = new ArrayList<UpdatePeriod>();
+ updates.add(UpdatePeriod.HOURLY);
+ updates.add(UpdatePeriod.DAILY);
+ Storage hdfsStorage = new HDFSStorage("C1",
+ TextInputFormat.class.getCanonicalName(),
+ HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+ storageAggregatePeriods.put(hdfsStorage, updates);
+ updatePeriods.put(hdfsStorage.getName(), updates);
+
+ CubeFactTable cubeFact = new CubeFactTable(cubeName, factName, factColumns,
+ updatePeriods);
+
+ // create cube fact
+ client.createCubeFactTable(cubeName, factName, factColumns,
+ storageAggregatePeriods);
+ Assert.assertTrue(client.tableExists(factName));
+ Table cubeTbl = client.getHiveTable(factName);
+ CubeFactTable cubeFact2 = new CubeFactTable(cubeTbl);
+ Assert.assertTrue(cubeFact.equals(cubeFact2));
+
+ // Assert for storage tables
+ for (Map.Entry<Storage, List<UpdatePeriod>> entry :
+ storageAggregatePeriods.entrySet()) {
+ List<UpdatePeriod> updatePeriodsList = entry.getValue();
+ for (UpdatePeriod period : updatePeriodsList) {
+ String storageTableName = MetastoreUtil.getFactStorageTableName(
+ factName, period, entry.getKey().getPrefix());
+ Assert.assertTrue(client.tableExists(storageTableName));
+ }
+ }
+
+ // test partition
+ client.addPartition(cubeFact, hdfsStorage, UpdatePeriod.HOURLY, now);
+ Assert.assertTrue(client.factPartitionExists(cubeFact, hdfsStorage,
+ UpdatePeriod.HOURLY, now));
+ }
+
+ @Test
+ public void testCubeFactWithParts() throws Exception {
+ List<FieldSchema> factColumns = new ArrayList<FieldSchema>(
+ cubeMeasures.size());
+ String factNameWithPart = "testFactPart";
+
+ for (CubeMeasure measure : cubeMeasures) {
+ factColumns.add(measure.getColumn());
+ }
+
+ // add some dimensions of the cube
+ factColumns.add(new FieldSchema("dim1","string", "dim1"));
+ factColumns.add(new FieldSchema("dim2","string", "dim2"));
+
+ List<FieldSchema> factPartColumns = new ArrayList<FieldSchema>();
+ factPartColumns.add(new FieldSchema("region","string", "region part"));
+
+ Map<String, List<UpdatePeriod>> updatePeriods =
+ new HashMap<String, List<UpdatePeriod>>();
+ Map<Storage, List<UpdatePeriod>> storageAggregatePeriods =
+ new HashMap<Storage, List<UpdatePeriod>>();
+ List<UpdatePeriod> updates = new ArrayList<UpdatePeriod>();
+ updates.add(UpdatePeriod.HOURLY);
+ updates.add(UpdatePeriod.DAILY);
+ Storage hdfsStorageWithParts = new HDFSStorage("C1",
+ SequenceFileInputFormat.class.getCanonicalName(),
+ HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+ hdfsStorageWithParts.addToPartCols(factPartColumns.get(0));
+ storageAggregatePeriods.put(hdfsStorageWithParts, updates);
+ updatePeriods.put(hdfsStorageWithParts.getName(), updates);
+
+ CubeFactTable cubeFactWithParts = new CubeFactTable(cubeName,
+ factNameWithPart, factColumns, updatePeriods);
+ client.createCubeFactTable(cubeName, factNameWithPart, factColumns,
+ storageAggregatePeriods);
+ Assert.assertTrue(client.tableExists(factNameWithPart));
+ Table cubeTbl = client.getHiveTable(factNameWithPart);
+ CubeFactTable cubeFact2 = new CubeFactTable(cubeTbl);
+ Assert.assertTrue(cubeFactWithParts.equals(cubeFact2));
+
+ // Assert for storage tables
+ for (Map.Entry<Storage, List<UpdatePeriod>> entry :
+ storageAggregatePeriods.entrySet()) {
+ List<UpdatePeriod> updatePeriodsList = entry.getValue();
+ for (UpdatePeriod period : updatePeriodsList) {
+ String storageTableName = MetastoreUtil.getFactStorageTableName(
+ factNameWithPart, period, entry.getKey().getPrefix());
+ Assert.assertTrue(client.tableExists(storageTableName));
+ }
+ }
+
+ Map<String, String> partSpec = new HashMap<String, String>();
+ partSpec.put(factPartColumns.get(0).getName(), "APAC");
+ // test partition
+ client.addPartition(cubeFactWithParts, hdfsStorageWithParts,
+ UpdatePeriod.HOURLY, now, partSpec);
+ Assert.assertTrue(client.factPartitionExists(cubeFactWithParts,
+ hdfsStorageWithParts,
+ UpdatePeriod.HOURLY, now, partSpec));
+ }
+
+ @Test
+ public void testCubeFactWithTwoStorages() throws Exception {
+ List<FieldSchema> factColumns = new ArrayList<FieldSchema>(
+ cubeMeasures.size());
+ String factName = "testFactTwoStorages";
+
+ for (CubeMeasure measure : cubeMeasures) {
+ factColumns.add(measure.getColumn());
+ }
+
+ // add some dimensions of the cube
+ factColumns.add(new FieldSchema("dim1","string", "dim1"));
+ factColumns.add(new FieldSchema("dim2","string", "dim2"));
+
+ List<FieldSchema> factPartColumns = new ArrayList<FieldSchema>();
+ factPartColumns.add(new FieldSchema("region","string", "region part"));
+
+ Map<String, List<UpdatePeriod>> updatePeriods =
+ new HashMap<String, List<UpdatePeriod>>();
+ Map<Storage, List<UpdatePeriod>> storageAggregatePeriods =
+ new HashMap<Storage, List<UpdatePeriod>>();
+ List<UpdatePeriod> updates = new ArrayList<UpdatePeriod>();
+ updates.add(UpdatePeriod.HOURLY);
+ updates.add(UpdatePeriod.DAILY);
+ Storage hdfsStorageWithParts = new HDFSStorage("C1",
+ SequenceFileInputFormat.class.getCanonicalName(),
+ HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+ hdfsStorageWithParts.addToPartCols(factPartColumns.get(0));
+ Storage hdfsStorageWithNoParts = new HDFSStorage("C2",
+ TextInputFormat.class.getCanonicalName(),
+ HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+
+ storageAggregatePeriods.put(hdfsStorageWithParts, updates);
+ storageAggregatePeriods.put(hdfsStorageWithNoParts, updates);
+ updatePeriods.put(hdfsStorageWithParts.getName(), updates);
+ updatePeriods.put(hdfsStorageWithNoParts.getName(), updates);
+
+ CubeFactTable cubeFactWithTwoStorages = new CubeFactTable(cubeName,
+ factName, factColumns, updatePeriods);
+ client.createCubeFactTable(cubeName, factName, factColumns,
+ storageAggregatePeriods);
+ Assert.assertTrue(client.tableExists(factName));
+ Table cubeTbl = client.getHiveTable(factName);
+ CubeFactTable cubeFact2 = new CubeFactTable(cubeTbl);
+ Assert.assertTrue(cubeFactWithTwoStorages.equals(cubeFact2));
+
+ // Assert for storage tables
+ for (Map.Entry<Storage, List<UpdatePeriod>> entry :
+ storageAggregatePeriods.entrySet()) {
+ List<UpdatePeriod> updatePeriodsList = entry.getValue();
+ for (UpdatePeriod period : updatePeriodsList) {
+ String storageTableName = MetastoreUtil.getFactStorageTableName(
+ factName, period, entry.getKey().getPrefix());
+ Assert.assertTrue(client.tableExists(storageTableName));
+ }
+ }
+
+ Map<String, String> partSpec = new HashMap<String, String>();
+ partSpec.put(factPartColumns.get(0).getName(), "APAC");
+ // test partition
+ client.addPartition(cubeFactWithTwoStorages, hdfsStorageWithParts,
+ UpdatePeriod.HOURLY, now, partSpec);
+ Assert.assertTrue(client.factPartitionExists(cubeFactWithTwoStorages,
+ hdfsStorageWithParts,
+ UpdatePeriod.HOURLY, now, partSpec));
+
+ client.addPartition(cubeFactWithTwoStorages, hdfsStorageWithNoParts,
+ UpdatePeriod.HOURLY, now);
+ Assert.assertTrue(client.factPartitionExists(cubeFactWithTwoStorages,
+ hdfsStorageWithNoParts,
+ UpdatePeriod.HOURLY, now));
+ }
+
+ @Test
+ public void testCubeDim() throws Exception {
+ String dimName = "ziptable";
+
+ List<FieldSchema> dimColumns = new ArrayList<FieldSchema>();
+ dimColumns.add(new FieldSchema("zipcode", "int", "code"));
+ dimColumns.add(new FieldSchema("f1", "string", "field1"));
+ dimColumns.add(new FieldSchema("f2", "string", "field2"));
+ dimColumns.add(new FieldSchema("stateid", "int", "state id"));
+
+ Map<String, TableReference> dimensionReferences =
+ new HashMap<String, TableReference>();
+ dimensionReferences.put("stateid", new TableReference("statetable", "id"));
+
+ Map<Storage, UpdatePeriod> snapshotDumpPeriods =
+ new HashMap<Storage, UpdatePeriod>();
+ Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>();
+ Storage hdfsStorage = new HDFSStorage("C1",
+ TextInputFormat.class.getCanonicalName(),
+ HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+ snapshotDumpPeriods.put(hdfsStorage, UpdatePeriod.HOURLY);
+ dumpPeriods.put(hdfsStorage.getName(), UpdatePeriod.HOURLY);
+ CubeDimensionTable cubeDim = new CubeDimensionTable(dimName,
+ dimColumns, dimensionReferences, dumpPeriods);
+ client.createCubeDimensionTable(dimName, dimColumns, dimensionReferences,
+ snapshotDumpPeriods);
+ Assert.assertTrue(client.tableExists(dimName));
+ Table cubeTbl = client.getHiveTable(dimName);
+ CubeDimensionTable cubeDim2 = new CubeDimensionTable(cubeTbl);
+ Assert.assertTrue(cubeDim.equals(cubeDim2));
+
+ // Assert for storage tables
+ for (Storage storage : snapshotDumpPeriods.keySet()) {
+ String storageTableName = MetastoreUtil.getDimStorageTableName(dimName,
+ storage.getPrefix());
+ Assert.assertTrue(client.tableExists(storageTableName));
+ }
+
+ // test partition
+ client.addPartition(cubeDim, hdfsStorage, now);
+ Assert.assertTrue(client.dimPartitionExists(cubeDim, hdfsStorage, now));
+ Assert.assertTrue(client.latestPartitionExists(cubeDim, hdfsStorage));
+ }
+
+ @Test
+ public void testCubeDimWithoutDumps() throws Exception {
+ String dimName = "countrytable";
+
+ List<FieldSchema> dimColumns = new ArrayList<FieldSchema>();
+ dimColumns.add(new FieldSchema("id", "int", "code"));
+ dimColumns.add(new FieldSchema("name", "string", "field1"));
+ dimColumns.add(new FieldSchema("capital", "string", "field2"));
+ dimColumns.add(new FieldSchema("region", "string", "region name"));
+
+ Map<String, TableReference> dimensionReferences =
+ new HashMap<String, TableReference>();
+ dimensionReferences.put("stateid", new TableReference("statetable", "id"));
+
+ Storage hdfsStorage = new HDFSStorage("C1",
+ TextInputFormat.class.getCanonicalName(),
+ HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+ Set<Storage> storages = new HashSet<Storage>();
+ storages.add(hdfsStorage);
+ CubeDimensionTable cubeDim = new CubeDimensionTable(dimName,
+ dimColumns, dimensionReferences);
+ client.createCubeDimensionTable(dimName, dimColumns, dimensionReferences,
+ storages);
+ Assert.assertTrue(client.tableExists(dimName));
+ Table cubeTbl = client.getHiveTable(dimName);
+ CubeDimensionTable cubeDim2 = new CubeDimensionTable(cubeTbl);
+ Assert.assertTrue(cubeDim.equals(cubeDim2));
+
+ // Assert for storage tables
+ for (Storage storage : storages) {
+ String storageTableName = MetastoreUtil.getDimStorageTableName(dimName,
+ storage.getPrefix());
+ Assert.assertTrue(client.tableExists(storageTableName));
+ Assert.assertTrue(!client.getStorageTable(storageTableName).isPartitioned());
+ }
+ }
+
+ @Test
+ public void testCubeDimWithTwoStorages() throws Exception {
+ String dimName = "citytable";
+
+ List<FieldSchema> dimColumns = new ArrayList<FieldSchema>();
+ dimColumns.add(new FieldSchema("id", "int", "code"));
+ dimColumns.add(new FieldSchema("name", "string", "field1"));
+ dimColumns.add(new FieldSchema("stateid", "int", "state id"));
+
+ Map<String, TableReference> dimensionReferences =
+ new HashMap<String, TableReference>();
+ dimensionReferences.put("stateid", new TableReference("statetable", "id"));
+
+ Storage hdfsStorage1 = new HDFSStorage("C1",
+ TextInputFormat.class.getCanonicalName(),
+ HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+ Storage hdfsStorage2 = new HDFSStorage("C2",
+ TextInputFormat.class.getCanonicalName(),
+ HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+ Map<Storage, UpdatePeriod> snapshotDumpPeriods =
+ new HashMap<Storage, UpdatePeriod>();
+ Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>();
+ snapshotDumpPeriods.put(hdfsStorage1, UpdatePeriod.HOURLY);
+ dumpPeriods.put(hdfsStorage1.getName(), UpdatePeriod.HOURLY);
+ snapshotDumpPeriods.put(hdfsStorage2, null);
+ dumpPeriods.put(hdfsStorage2.getName(), null);
+ CubeDimensionTable cubeDim = new CubeDimensionTable(dimName,
+ dimColumns, dimensionReferences, dumpPeriods);
+ client.createCubeDimensionTable(dimName, dimColumns, dimensionReferences,
+ snapshotDumpPeriods);
+ Assert.assertTrue(client.tableExists(dimName));
+ Table cubeTbl = client.getHiveTable(dimName);
+ CubeDimensionTable cubeDim2 = new CubeDimensionTable(cubeTbl);
+ Assert.assertTrue(cubeDim.equals(cubeDim2));
+
+ // Assert for storage tables
+ String storageTableName1 = MetastoreUtil.getDimStorageTableName(dimName,
+ hdfsStorage1.getPrefix());
+ Assert.assertTrue(client.tableExists(storageTableName1));
+ String storageTableName2 = MetastoreUtil.getDimStorageTableName(dimName,
+ hdfsStorage2.getPrefix());
+ Assert.assertTrue(client.tableExists(storageTableName2));
+ Assert.assertTrue(!client.getStorageTable(storageTableName2).isPartitioned());
+ }
+
+}