You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by pr...@apache.org on 2017/03/27 09:08:25 UTC
[6/7] lens git commit: LENS-1400: Convert CubeTestSetup to setup
using xml files instead of code
http://git-wip-us.apache.org/repos/asf/lens/blob/112af59c/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
index 94d4b40..2d031f4 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
@@ -27,29 +27,36 @@ import static org.apache.lens.cube.metadata.UpdatePeriod.*;
import static org.testng.Assert.*;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.StringReader;
import java.util.*;
+import java.util.stream.Collectors;
+import javax.xml.bind.JAXBException;
+
+import org.apache.lens.api.ToXMLString;
+import org.apache.lens.api.jaxb.LensJAXBContext;
+import org.apache.lens.api.metastore.SchemaTraverser;
import org.apache.lens.cube.metadata.*;
-import org.apache.lens.cube.metadata.ExprColumn.ExprSpec;
-import org.apache.lens.cube.metadata.ReferencedDimAttribute.ChainRefCol;
import org.apache.lens.cube.metadata.timeline.EndsAndHolesPartitionTimeline;
import org.apache.lens.cube.metadata.timeline.PartitionTimeline;
import org.apache.lens.cube.metadata.timeline.StoreAllPartitionTimeline;
-import org.apache.lens.server.api.LensConfConstants;
import org.apache.lens.server.api.error.LensException;
import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.text.StrLookup;
+import org.apache.commons.lang3.text.StrSubstitutor;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
-import org.apache.hadoop.hive.ql.parse.ParseException;
import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.mapred.TextInputFormat;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
@@ -92,9 +99,6 @@ public class CubeTestSetup {
public static final String TEST_CUBE_NAME = "testCube";
public static final String DERIVED_CUBE_NAME = "derivedCube";
public static final String BASE_CUBE_NAME = "baseCube";
- public static final String DERIVED_CUBE_NAME1 = "der1";
- public static final String DERIVED_CUBE_NAME2 = "der2";
- public static final String DERIVED_CUBE_NAME3 = "der3";
private static String c0 = "C0";
private static String c1 = "C1";
@@ -511,1305 +515,16 @@ public class CubeTestSetup {
return expected.toString();
}
- Set<ExprColumn> exprs;
-
- private void createCube(CubeMetastoreClient client) throws HiveException, ParseException, LensException {
- cubeMeasures = new HashSet<CubeMeasure>();
- Map<String, String> tags = new HashMap<>();
- tags.put(MetastoreConstants.MEASURE_DATACOMPLETENESS_TAG, "tag1");
- cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr1", "int", "first measure"), null, null, null, null, null,
- null, null, null, null, tags));
- cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr2", "float", "second measure"), "Measure2", null, "SUM",
- "RS"));
- cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr21", "float", "second measure"), "Measure22", null, "SUM",
- "RS"));
- cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr22", "float", "second measure"), "Measure22", null, "SUM",
- "RS"));
- cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr3", "double", "third measure"), "Measure3", null, "MAX",
- null));
- cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr4", "bigint", "fourth measure"), "Measure4", null, "COUNT",
- null));
- cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr9", "bigint", "ninth measure"), null, null, null, null,
- null, null, null, null, null, tags));
- cubeMeasures.add(new ColumnMeasure(new FieldSchema("noAggrMsr", "bigint", "measure without a default aggregate"),
- "No aggregateMsr", null, null, null));
- cubeMeasures.add(new ColumnMeasure(new FieldSchema("newmeasure", "bigint", "measure available from now"),
- "New measure", null, null, null, NOW, null, 100.0));
- cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr15", "int", "fifteenth measure"), "Measure15", null, "SUM",
- "RS"));
- String prefix = "union_join_ctx_";
- cubeMeasures.add(new ColumnMeasure(new FieldSchema(prefix + "msr1", "int", prefix + "first measure")));
- cubeMeasures.add(new ColumnMeasure(new FieldSchema(prefix + "msr2", "int", prefix + "second measure")));
- cubeMeasures.add(new ColumnMeasure(new FieldSchema(prefix + "msr3", "int", prefix + "third measure")));
-
- cubeDimensions = new HashSet<CubeDimAttribute>();
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema(prefix + "cityid", "int", prefix + "the cityid ")));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema(prefix + "zipcode", "int", prefix + "the zipcode")));
-
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("d_time", "timestamp", "d time")));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("processing_time", "timestamp", "processing time")));
- List<CubeDimAttribute> locationHierarchy = new ArrayList<CubeDimAttribute>();
- locationHierarchy.add(new BaseDimAttribute(new FieldSchema("zipcode", "int", "zip")));
- locationHierarchy.add(new BaseDimAttribute(new FieldSchema("cityid", "int", "city")));
- locationHierarchy.add(new BaseDimAttribute(new FieldSchema("stateid", "int", "state")));
- locationHierarchy.add(new BaseDimAttribute(new FieldSchema("countryid", "int", "country")));
- List<String> regions = Arrays.asList("APAC", "EMEA", "USA");
- locationHierarchy.add(new BaseDimAttribute(new FieldSchema("regionname", "string", "region"), "regionname", null,
- null, null, null, regions));
-
- cubeDimensions.add(new HierarchicalDimAttribute("location", "Location hierarchy", locationHierarchy));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("dim1", "string", "basedim")));
- // Added for ambiguity test
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("ambigdim1", "string", "used in testColumnAmbiguity")));
- cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("dim2", "int", "ref dim"), "dim2 refer",
- "dim2chain", "id", null, null, 0.0));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("cdim2", "int", "ref dim"), "Dim2 refer", NOW, null, null));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("urdimid", "int", "ref dim"), "urdim refer",
- null, null, 10.0));
- cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("unreachableName", "string", ""), "urdim name",
- "unreachableDim_chain", "name", null, null, 10.0));
- // denormalized reference
- cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("dim2big1", "bigint", "ref dim"), "dim2 refer",
- "dim2chain", "bigid1", null, null, 0.0));
- cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("dim2big2", "bigint", "ref dim"), "dim2 refer",
- "dim2chain", "bigid2", null, null, 0.0));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("dim2bignew", "bigint", "ref dim"), "Dim2 refer",
- NOW, null, null));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("test_time_dim_hour_id", "int", "ref dim"),
- "Timedim reference", null, null, null));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("test_time_dim_day_id", "int", "ref dim"),
- "Timedim reference", null, null, null));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("test_time_dim_hour_id2", "int", "ref dim")));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("test_time_dim_day_id2", "int", "ref dim")));
- cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("testDim3id", "string", "direct id to testdim3"),
- "dim3 refer", "dim3chain", "id", null, null, 0.0));
- cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("cityname", "string", "city name"),
- "city name", "cubecity", "name", null, null, 0.0));
- cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema(prefix + "cityname", "string", prefix + "city name"),
- prefix + "city name", "cubeCityJoinUnionCtx", "name", null, null, 0.0));
- cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("statename_cube", "string", "state name"),
- "state name", "cubestate", "name", null, null, 0.0));
- List<ChainRefCol> references = new ArrayList<>();
- references.add(new ChainRefCol("timedatechain1", "full_date"));
- references.add(new ChainRefCol("timehourchain1", "full_hour"));
- cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("test_time_dim", "date", "ref dim"),
- "Timedim full date", references, null, null, null, null));
- List<ChainRefCol> chainRefs = new ArrayList<>();
- chainRefs.add(new ChainRefCol("timehourchain2", "full_hour"));
- chainRefs.add(new ChainRefCol("timedatechain2", "full_date"));
- cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("test_time_dim2", "date", "chained dim"),
- "Timedim full date", chainRefs, null, null, null, null));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("cityid1", "int", "id to city"),
- "City1", null, null, null));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("cityid2", "int", "id to city"),
- "City2", null, null, null));
- cubeDimensions.add(new BaseDimAttribute(new FieldSchema("concatedcitystate", "string", "citystate"),
- "CityState", null, null, null));
-
- Map<String, JoinChain> joinChains = new HashMap<>();
- addCubeChains(joinChains, TEST_CUBE_NAME);
-
- exprs = new HashSet<ExprColumn>();
- exprs.add(new ExprColumn(new FieldSchema("avgmsr", "double", "avg measure"), "Avg Msr", "avg(msr1 + msr2)"));
- exprs.add(new ExprColumn(new FieldSchema("singlecolmsr2expr", "double", "measure2"), "Msr2", "msr2)"));
- exprs.add(new ExprColumn(new FieldSchema("singlecolmsr2qualifiedexpr", "double", "testcube.measure2"),
- "Msr2", "testcube.msr2"));
- exprs.add(new ExprColumn(new FieldSchema("singlecoldim1expr", "string", "dim1"), "dim1", "dim1)"));
- exprs.add(new ExprColumn(new FieldSchema("singlecoldim1qualifiedexpr", "string", "testcube.dim1"),
- "dim1", "testcube.dim1"));
- exprs.add(new ExprColumn(new FieldSchema("singlecolchainid", "string", "dim3chain.id"),
- "dim3chainid", "dim3chain.id)"));
- exprs.add(new ExprColumn(new FieldSchema("singlecolchainrefexpr", "string", "testcube.testDim3id"),
- "dim3chainid", "testcube.testDim3id"));
- exprs.add(new ExprColumn(new FieldSchema("singlecolchainfield", "string", "cubecity.name"),
- "cubecityname", "cubecity.name"));
- exprs.add(new ExprColumn(new FieldSchema("summsrs", "double", "sum measures"), "Sum Msrs",
- "(1000 + sum(msr1) + sum(msr2))/100"));
- exprs.add(new ExprColumn(new FieldSchema("msr5", "double", "materialized in some facts"), "Fifth Msr",
- "msr2 + msr3"));
- exprs.add(new ExprColumn(new FieldSchema("msr8", "double", "measure expression"), "Sixth Msr",
- "msr2 + msr3"));
- exprs.add(new ExprColumn(new FieldSchema("msr7", "double", "measure expression"), "Seventh Msr",
- "case when sum(msr2) = 0 then 0 else sum(case when cityid='x' then msr21 else msr22 end)/sum(msr2) end"));
- exprs.add(new ExprColumn(new FieldSchema("equalsums", "double", "sums are equals"), "equalsums",
- new ExprSpec("msr3 + msr4", null, null), new ExprSpec("(msr3 + msr2)/100", null, null)));
- exprs.add(new ExprColumn(new FieldSchema("roundedmsr1", "double", "rounded measure1"), "Rounded msr1",
- "round(msr1/1000)"));
- exprs.add(new ExprColumn(new FieldSchema("roundedmsr2", "double", "rounded measure2"), "Rounded msr2",
- "round(msr2/1000)"));
- exprs.add(new ExprColumn(new FieldSchema("flooredmsr12", "double", "floored measure12"), "Floored msr12",
- "floor(msr12)"));
- exprs.add(new ExprColumn(new FieldSchema("nestedexpr", "double", "nested expr"), "Nested expr",
- new ExprSpec("avg(roundedmsr2)", null, null), new ExprSpec("avg(equalsums)", null, null),
- new ExprSpec("case when substrexpr = 'xyz' then avg(msr5) when substrexpr = 'abc' then avg(msr4)/100 end",
- null, null)));
- exprs.add(new ExprColumn(new FieldSchema("msr2expr", "double", "nested expr"), "Nested expr",
- new ExprSpec("case when cityStateName = 'xyz' then msr2 else 0 end", null, null)));
- exprs.add(new ExprColumn(new FieldSchema("nestedExprWithTimes", "double", "nested expr"), "Nested expr",
- new ExprSpec("avg(roundedmsr2)", null, null), new ExprSpec("avg(equalsums)", null, null),
- new ExprSpec("case when substrexpr = 'xyz' then avg(msr5) when substrexpr = 'abc' then avg(msr4)/100 end",
- NOW, null), new ExprSpec("avg(newmeasure)", null, null)));
- exprs.add(new ExprColumn(new FieldSchema("msr6", "bigint", "sixth measure"), "Measure6",
- "sum(msr2) + max(msr3)/ count(msr4)"));
- exprs.add(new ExprColumn(new FieldSchema("booleancut", "boolean", "a boolean expression"), "Boolean cut",
- "(dim1 != 'x' AND dim2 != 10)"));
- exprs.add(new ExprColumn(new FieldSchema("substrexpr", "string", "a sub-string expression"), "Substr expr",
- new ExprSpec("substr(dim1, 3))", null, null), new ExprSpec("substr(ascii(dim2chain.name), 3)", null, null)));
- exprs.add(new ExprColumn(new FieldSchema("substrexprdim2", "string", "a sub-string expression"), "Substr expr",
- new ExprSpec("substr(dim2, 3))", null, null), new ExprSpec("substr(ascii(dim2chain.name), 3)", null, null)));
- exprs.add(new ExprColumn(new FieldSchema("indiasubstr", "boolean", "nested sub string expression"), "Nested expr",
- "substrexpr = 'INDIA'"));
- exprs.add(new ExprColumn(new FieldSchema("refexpr", "string", "expression which facts and dimensions"),
- "Expr with cube and dim fields", "concat(dim1, \":\", citydim.name)"));
- exprs.add(new ExprColumn(new FieldSchema("nocolexpr", "string", "expression which non existing colun"),
- "No col expr", "myfun(nonexist)"));
- exprs.add(new ExprColumn(new FieldSchema("newexpr", "string", "expression which non existing colun"),
- "new measure expr", "myfun(newmeasure)"));
- exprs.add(new ExprColumn(new FieldSchema("cityAndState", "String", "city and state together"), "City and State",
- new ExprSpec("concat(cityname, \":\", statename_cube)", null, null),
- new ExprSpec("substr(concatedcitystate, 10)", null, null)));
- exprs.add(new ExprColumn(new FieldSchema("cityAndStateNew", "String", "city and state together"), "City and State",
- new ExprSpec("concat(cityname, \":\", statename_cube)", null, TWO_MONTHS_BACK),
- new ExprSpec("substr(concatedcitystate, 10)", null, null)));
- exprs.add(new ExprColumn(new FieldSchema("cityStateName", "String", "city state"), "City State",
- "concat('CityState:', cubecity.statename)"));
- exprs.add(new ExprColumn(new FieldSchema("isIndia", "String", "is indian city/state"), "Is Indian City/state",
- "cubecity.name == 'DELHI' OR cubestate.name == 'KARNATAKA' OR cubestate.name == 'MAHARASHTRA'"));
- exprs.add(new ExprColumn(new FieldSchema("cubeStateName", "String", "statename from cubestate"), "CubeState Name",
- "substr(cubestate.name, 5)"));
- exprs.add(new ExprColumn(new FieldSchema("substrdim2big1", "String", "substr of dim2big1"), "dim2big1 substr",
- "substr(dim2big1, 5)"));
- exprs.add(new ExprColumn(new FieldSchema("asciicity", "String", "ascii cityname"), "ascii cityname substr",
- "ascii(cityname)"));
- exprs.add(new ExprColumn(new FieldSchema("countofdistinctcityid", "int", "Count of Distinct CityId"),
- "Count of Distinct CityId Expr", "count(distinct(cityid))"));
- exprs.add(new ExprColumn(new FieldSchema("notnullcityid", "int", "Not null cityid"),
- "Not null cityid Expr", "case when cityid is null then 0 else cityid end"));
- // union join context
- exprs.add(new ExprColumn(new FieldSchema(prefix + "notnullcityid", "int", prefix + "Not null cityid"),
- prefix + "Not null cityid Expr", "case when union_join_ctx_cityid is null then 0 "
- + "else union_join_ctx_cityid end"));
- exprs.add(new ExprColumn(new FieldSchema(prefix + "sum_msr1_msr2", "int", prefix + "sum of msr1 and msr2"),
- prefix + "sum of msr1 and msr2", "sum(union_join_ctx_msr1) + sum(union_join_ctx_msr2)"));
- exprs.add(new ExprColumn(new FieldSchema(prefix + "msr1_greater_than_100", "int", prefix + "msr1 greater than 100"),
- prefix + "msr1 greater than 100", "case when sum(union_join_ctx_msr1) > 100 then \"high\" else \"low\" end"));
- exprs.add(new ExprColumn(new FieldSchema(prefix + "non_zero_msr2_sum", "int", prefix + "non zero msr2 sum"),
- prefix + "non zero msr2 sum", "sum(case when union_join_ctx_msr2 > 0 then union_join_ctx_msr2 else 0 end)"));
-
- Map<String, String> cubeProperties = new HashMap<String, String>();
- cubeProperties.put(MetastoreUtil.getCubeTimedDimensionListKey(TEST_CUBE_NAME),
- "d_time,pt,it,et,test_time_dim,test_time_dim2");
- cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "test_time_dim", "ttd");
- cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "test_time_dim2", "ttd2");
- cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "d_time", "dt");
- cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "it", "it");
- cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "et", "et");
- cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "pt", "pt");
- cubeProperties.put(MetastoreConstants.TIMEDIM_RELATION + "d_time", "test_time_dim+[-10 days,10 days]");
-
- client.createCube(TEST_CUBE_NAME, cubeMeasures, cubeDimensions, exprs, Sets.newHashSet(joinChains.values()),
- cubeProperties);
-
- Set<String> measures = new HashSet<String>();
- measures.add("msr1");
- measures.add("msr2");
- measures.add("msr3");
- measures.add("msr9");
- Set<String> dimensions = new HashSet<String>();
- dimensions.add("dim1");
- dimensions.add("dim2");
- dimensions.add("dim2big1");
- dimensions.add("dim2big2");
- dimensions.add("dim2bignew");
- // Try creating derived cube with non existant dim/measures
- try{
- client.createDerivedCube(TEST_CUBE_NAME, DERIVED_CUBE_NAME,
- Sets.newHashSet("random_measure"), Sets.newHashSet("random_dim_attribute"),
- new HashMap<String, String>(), 5L);
- } catch(LensException e) {
- assertTrue(e.getMessage().contains("random_measure"));
- assertTrue(e.getMessage().contains("random_dim_attribute"));
- assertTrue(e.getMessage().contains("not present"));
- }
- client.createDerivedCube(TEST_CUBE_NAME, DERIVED_CUBE_NAME,
- measures, dimensions, new HashMap<String, String>(), 5L);
- }
-
- private void addCubeChains(Map<String, JoinChain> joinChains, final String cubeName) {
- final String prefix = "union_join_ctx_";
- joinChains.put("timehourchain1", new JoinChain("timehourchain1", "time chain", "time dim thru hour dim") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "test_time_dim_hour_id"));
- add(new TableReference("hourdim", "id"));
- }
- });
- }
- });
- joinChains.put("timedatechain1", new JoinChain("timedatechain1", "time chain", "time dim thru date dim") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "test_time_dim_day_id"));
- add(new TableReference("daydim", "id"));
- }
- });
- }
- });
- joinChains.put("timehourchain2", new JoinChain("timehourchain2", "time chain", "time dim thru hour dim") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "test_time_dim_hour_id2"));
- add(new TableReference("hourdim", "id"));
- }
- });
- }
- });
- joinChains.put("timedatechain2", new JoinChain("timedatechain2", "time chain", "time dim thru date dim") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "test_time_dim_day_id2"));
- add(new TableReference("daydim", "id"));
- }
- });
- }
- });
- joinChains.put("cubeCity", new JoinChain("cubeCity", "cube-city", "city thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "cityid"));
- add(new TableReference("citydim", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2"));
- add(new TableReference("testdim2", "id"));
- add(new TableReference("testdim2", "cityid"));
- add(new TableReference("citydim", "id"));
- }
- });
- }
- });
- joinChains.put("cubeCityJoinUnionCtx", new JoinChain("cubeCityJoinUnionCtx", "cube-city", "city thru cube") {
- {
- // added for testing union join context
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, prefix + "cityid"));
- add(new TableReference("citydim", "id"));
- }
- });
- }
- });
- joinChains.put("cubeCity1", new JoinChain("cubeCity1", "cube-city", "city thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "cityid1"));
- add(new TableReference("citydim", "id"));
- }
- });
- }
- });
- joinChains.put("cubeCity2", new JoinChain("cubeCity2", "cube-city", "city thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "cityid2"));
- add(new TableReference("citydim", "id"));
- }
- });
- }
- });
- joinChains.put("cubeState", new JoinChain("cubeState", "cube-state", "state thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "stateid"));
- add(new TableReference("statedim", "id"));
- }
- });
- }
- });
- joinChains.put("cubeZip", new JoinChain("cubeZipJoinUnionCtx", "cube-zip", "Zipcode thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, prefix + "zipcode"));
- add(new TableReference("zipdim", "code"));
- }
- });
- }
- });
- joinChains.put("cubeZip", new JoinChain("cubeZip", "cube-zip", "Zipcode thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "zipcode"));
- add(new TableReference("zipdim", "code"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, prefix + "zipcode"));
- add(new TableReference("zipdim", "code"));
- }
- });
- }
- });
- joinChains.put("cubeCountry", new JoinChain("cubeCountry", "cube-country", "country thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "countryid"));
- add(new TableReference("countrydim", "id"));
- }
- });
- }
- });
- joinChains.put("dim2chain", new JoinChain("dim2chain", "cube-testdim2", "testdim2 thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2"));
- add(new TableReference("testdim2", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2big1"));
- add(new TableReference("testdim2", "bigid1"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2big2"));
- add(new TableReference("testdim2", "bigid2"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2bignew"));
- add(new TableReference("testdim2", "bigidnew"));
- }
- });
- }
- });
- joinChains.put("dim3chain", new JoinChain("dim3chain", "cube-testdim3", "cyclicdim thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2"));
- add(new TableReference("testdim2", "id"));
- add(new TableReference("testdim2", "testdim3id"));
- add(new TableReference("testdim3", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2big1"));
- add(new TableReference("testdim2", "bigid1"));
- add(new TableReference("testdim2", "testdim3id"));
- add(new TableReference("testdim3", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2big2"));
- add(new TableReference("testdim2", "bigid2"));
- add(new TableReference("testdim2", "testdim3id"));
- add(new TableReference("testdim3", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2bignew"));
- add(new TableReference("testdim2", "bigidnew"));
- add(new TableReference("testdim2", "testdim3id"));
- add(new TableReference("testdim3", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "testdim3id"));
- add(new TableReference("testdim3", "id"));
- }
- });
- }
- });
- joinChains.put("dim4chain", new JoinChain("dim4chain", "cube-testdim3", "cyclicdim thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2"));
- add(new TableReference("testdim2", "id"));
- add(new TableReference("testdim2", "testdim3id"));
- add(new TableReference("testdim3", "id"));
- add(new TableReference("testdim3", "testdim4id"));
- add(new TableReference("testdim4", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2big1"));
- add(new TableReference("testdim2", "bigid1"));
- add(new TableReference("testdim2", "testdim3id"));
- add(new TableReference("testdim3", "id"));
- add(new TableReference("testdim3", "testdim4id"));
- add(new TableReference("testdim4", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2big2"));
- add(new TableReference("testdim2", "bigid2"));
- add(new TableReference("testdim2", "testdim3id"));
- add(new TableReference("testdim3", "id"));
- add(new TableReference("testdim3", "testdim4id"));
- add(new TableReference("testdim4", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "dim2bignew"));
- add(new TableReference("testdim2", "bigidnew"));
- add(new TableReference("testdim2", "testdim3id"));
- add(new TableReference("testdim3", "id"));
- add(new TableReference("testdim3", "testdim4id"));
- add(new TableReference("testdim4", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "testdim3id"));
- add(new TableReference("testdim3", "id"));
- add(new TableReference("testdim3", "testdim4id"));
- add(new TableReference("testdim4", "id"));
- }
- });
- }
- });
- joinChains.put("cdimChain", new JoinChain("cdimChain", "cube-cyclicdim", "cyclicdim thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "cdim2"));
- add(new TableReference("cycledim1", "id"));
- }
- });
- }
- });
- joinChains.put("unreachableDim_chain", new JoinChain("unreachableDim_chain", "cube-unreachableDim",
- "unreachableDim thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "urdimid"));
- add(new TableReference("unreachableDim", "id"));
- }
- });
- }
- });
- joinChains.put("cubeCountry", new JoinChain("cubeCountry", "cube-country", "country thru cube") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference(cubeName, "countryid"));
- add(new TableReference("countrydim", "id"));
- }
- });
- }
- });
- }
- private void createBaseAndDerivedCubes(CubeMetastoreClient client)
- throws HiveException, ParseException, LensException {
- Set<CubeMeasure> cubeMeasures2 = new HashSet<>(cubeMeasures);
- Set<CubeDimAttribute> cubeDimensions2 = new HashSet<>(cubeDimensions);
- cubeMeasures2.add(new ColumnMeasure(new FieldSchema("msr11", "int", "first measure")));
- cubeMeasures2.add(new ColumnMeasure(new FieldSchema("msr12", "float", "second measure"), "Measure2", null, "SUM",
- "RS"));
- cubeMeasures2.add(new ColumnMeasure(new FieldSchema("msr13", "double", "third measure"), "Measure3", null, "MAX",
- null));
- cubeMeasures2.add(new ColumnMeasure(new FieldSchema("msr14", "bigint", "fourth measure"), "Measure4", null,
- "COUNT", null));
- cubeMeasures2.add(new ColumnMeasure(new FieldSchema("directMsr", "bigint", "fifth measure"), "Direct Measure",
- null, "SUM", null));
-
- cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("dim11", "string", "basedim")));
- cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("dim12", "int", "ref dim"), "Dim2 refer",
- "dim2chain", "id", null, null, null)); // used as key in the chains
- cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("dim22", "int", "ref dim"), "Dim2 refer",
- "dim2chain", "id", null, null, null)); // not used as key in the chains
- cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("dim13", "string", "basedim")));
- cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("userid", "int", "userid")));
- cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("xuserid", "int", "userid")));
- cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("yuserid", "int", "userid")));
- cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("user_id_added_in_past", "int", "user_id_added_in_past")));
- cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("user_id_added_far_future", "int",
- "user_id_added_far_future")));
- cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("user_id_deprecated", "int", "user_id_deprecated")));
-
- cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("xsports", "array<string>", ""),
- "xuser sports", "xusersports", "name", null, null, null));
- cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("ysports", "array<string>", ""),
- "yuser sports", "yusersports", "name", null, null, null));
- cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("sports", "array<string>", ""),
- "user sports", "usersports", "name", null, null, null));
- cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("sportids", "array<int>", ""),
- "user sports", "userInterestIds", "sport_id", null, null, null));
- cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("statecountry", "string", ""),
- "state country", "cubestatecountry", "name", null, null, null));
- cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("citycountry", "string", ""),
- "city country", "cubecitystatecountry", "name", null, null, null));
- List<ChainRefCol> refCols = new ArrayList<>();
- refCols.add(new ChainRefCol("cubeState", "countrycapital"));
- refCols.add(new ChainRefCol("cubeCityStateCountry", "capital"));
- cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("cubeCountryCapital", "String", "ref dim"),
- "Country capital", refCols, null, null, null, null));
- Map<String, String> cubeProperties = new HashMap<>();
- cubeProperties.put(MetastoreUtil.getCubeTimedDimensionListKey(BASE_CUBE_NAME),
- "d_time,pt,it,et,test_time_dim,test_time_dim2");
- cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "test_time_dim", "ttd");
- cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "test_time_dim2", "ttd2");
- cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "d_time", "dt");
- cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "it", "it");
- cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "et", "et");
- cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "processing_time", "pt");
- cubeProperties.put(MetastoreConstants.TIMEDIM_RELATION + "d_time", "processing_time+[-5 days,5 days]");
- cubeProperties.put(MetastoreConstants.TIMEDIM_RELATION + "processing_time", "test_time_dim+[-5 days,5 days]");
- cubeProperties.put(MetastoreConstants.CUBE_ALL_FIELDS_QUERIABLE, "false");
-
- Map<String, JoinChain> joinChainMap = new HashMap<>();
- addCubeChains(joinChainMap, "basecube");
- // update new paths
- joinChainMap.get("dim2chain").addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "dim12"));
- add(new TableReference("testdim2", "id"));
- }
- });
- joinChainMap.get("dim3chain").addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "dim12"));
- add(new TableReference("testdim2", "id"));
- add(new TableReference("testdim2", "testdim3id"));
- add(new TableReference("testdim3", "id"));
- }
- });
- joinChainMap.get("dim4chain").addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "dim12"));
- add(new TableReference("testdim2", "id"));
- add(new TableReference("testdim2", "testdim3id"));
- add(new TableReference("testdim3", "id"));
- add(new TableReference("testdim3", "testdim4id"));
- add(new TableReference("testdim4", "id"));
- }
- });
- Set<JoinChain> joinChains = Sets.newHashSet(joinChainMap.values());
- joinChains.add(new JoinChain("cityState", "city-state", "state thru city") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "cityid"));
- add(new TableReference("citydim", "id"));
- add(new TableReference("citydim", "stateid"));
- add(new TableReference("statedim", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "cityid"));
- add(new TableReference("citydim", "id"));
- add(new TableReference("citydim", "statename"));
- add(new TableReference("statedim", "name"));
- }
- });
- }
- });
- joinChains.add(new JoinChain("cityZip", "city-zip", "zip thru city") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "cityid"));
- add(new TableReference("citydim", "id"));
- add(new TableReference("citydim", "zipcode"));
- add(new TableReference("zipdim", "code"));
- }
- });
- }
- });
- joinChains.add(new JoinChain("cubeStateCountry", "cube-state-country", "country through state") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "stateid"));
- add(new TableReference("statedim", "id"));
- add(new TableReference("statedim", "countryid"));
- add(new TableReference("countrydim", "id"));
- }
- });
- }
- });
- joinChains.add(new JoinChain("cubeCityStateCountry", "cube-city-state-country", "country through state thru city") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "cityid"));
- add(new TableReference("citydim", "id"));
- add(new TableReference("citydim", "stateid"));
- add(new TableReference("statedim", "id"));
- add(new TableReference("statedim", "countryid"));
- add(new TableReference("countrydim", "id"));
- }
- });
- }
- });
- joinChains.add(new JoinChain("userchain", "user-chain", "user chain") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "userid"));
- add(new TableReference("userdim", "id"));
- }
- });
- }
- });
- joinChains.add(new JoinChain("user_id_added_far_future_chain", "user_id_added_far_future_chain",
- "user_id_added_far_future_chain") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "user_id_added_far_future"));
- add(new TableReference("userdim", "user_id_added_far_future"));
- }
- });
- }
- });
- joinChains.add(new JoinChain("userSports", "user-sports", "user sports") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "userid"));
- add(new TableReference("userdim", "id"));
- add(new TableReference("userdim", "id"));
- add(new TableReference("user_interests", "user_id", true));
- add(new TableReference("user_interests", "sport_id"));
- add(new TableReference("sports", "id"));
- }
- });
- }
- });
- joinChains.add(new JoinChain("userInterestIds", "user-interestsIds", "user interest ids") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "userid"));
- add(new TableReference("userdim", "id"));
- add(new TableReference("userdim", "id"));
- add(new TableReference("user_interests", "user_id", true));
- }
- });
- }
- });
- joinChains.add(new JoinChain("xuserSports", "xuser-sports", "xuser sports") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "xuserid"));
- add(new TableReference("userdim", "id"));
- add(new TableReference("userdim", "id"));
- add(new TableReference("user_interests", "user_id", true));
- add(new TableReference("user_interests", "sport_id"));
- add(new TableReference("sports", "id"));
- }
- });
- }
- });
- joinChains.add(new JoinChain("yuserSports", "user-sports", "user sports") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("basecube", "yuserid"));
- add(new TableReference("userdim", "id"));
- add(new TableReference("userdim", "id"));
- add(new TableReference("user_interests", "user_id", true));
- add(new TableReference("user_interests", "sport_id"));
- add(new TableReference("sports", "id"));
- }
- });
- }
- });
-
- // add ref dim through chain
- cubeDimensions2.add(
- new ReferencedDimAttribute(new FieldSchema("cityStateCapital", "string", "State's capital thru city"),
- "State's capital thru city", "cityState", "capital", null, null, null));
- Set<ExprColumn> baseExprs = new HashSet<>(exprs);
- baseExprs.add(new ExprColumn(new FieldSchema("substrsprorts", "String", "substr of sports"), "substr sports",
- "substr(sports, 10)"));
- baseExprs.add(new ExprColumn(new FieldSchema("xsports_abbr", "array<string>", ""),
- "xuser sports", "substr(xsports, 3)"));
- baseExprs.add(new ExprColumn(new FieldSchema("ysports_abbr", "array<string>", ""),
- "yuser sports", "substr(ysports, 3)"));
- baseExprs.add(new ExprColumn(new FieldSchema("sports_abbr", "array<string>", ""),
- "user sports", "substr(sports, 3)"));
- baseExprs.add(new ExprColumn(new FieldSchema("sportids_abbr", "array<string>", ""),
- "user sports", "case when sportids == 1 then 'CKT' when sportids == 2 then 'FTB' else 'NON' end"));
- baseExprs.add(new ExprColumn(new FieldSchema("directMsrExpr", "bigint", ""),
- "Direct Measure", new ExprSpec("directMsr + 0", null, null), new ExprSpec("msr13 + msr14", null, null)));
- client.createCube(BASE_CUBE_NAME, cubeMeasures2, cubeDimensions2, baseExprs, joinChains, cubeProperties);
-
- Map<String, String> derivedProperties = new HashMap<>();
- derivedProperties.put(MetastoreConstants.CUBE_ALL_FIELDS_QUERIABLE, "true");
- Set<String> measures = new HashSet<>();
- measures.add("msr1");
- measures.add("msr9");
- measures.add("msr11");
- Set<String> dimensions = new HashSet<>();
- dimensions.add("dim1");
- dimensions.add("dim11");
- dimensions.add("d_time");
- client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME1, measures, dimensions, derivedProperties, 5L);
-
- measures = new HashSet<>();
- measures.add("msr2");
- measures.add("msr12");
- measures.add("msr13");
- measures.add("msr14");
- measures.add("directmsr");
- dimensions = new HashSet<>();
- dimensions.add("cityid");
- dimensions.add("stateid");
- dimensions.add("userid");
- dimensions.add("xuserid");
- dimensions.add("yuserid");
- dimensions.add("dim1");
- dimensions.add("dim2");
- dimensions.add("dim2big1");
- dimensions.add("dim2big2");
- dimensions.add("dim2bignew");
- dimensions.add("dim11");
- dimensions.add("dim13");
- dimensions.add("dim12");
- dimensions.add("dim22");
- dimensions.add("d_time");
- dimensions.add("test_time_dim");
- dimensions.add("test_time_dim2");
- dimensions.add("test_time_dim_hour_id");
- dimensions.add("test_time_dim_day_id");
- dimensions.add("test_time_dim_hour_id2");
- dimensions.add("test_time_dim_day_id2");
- client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME2, measures, dimensions, derivedProperties, 10L);
- measures = new HashSet<>();
- measures.add("msr3");
- measures.add("msr13");
- dimensions = new HashSet<>();
- dimensions.add("dim1");
- dimensions.add("location");
- dimensions.add("d_time");
- dimensions.add("test_time_dim");
- dimensions.add("test_time_dim2");
- dimensions.add("test_time_dim_hour_id");
- dimensions.add("test_time_dim_day_id");
- dimensions.add("test_time_dim_hour_id2");
- dimensions.add("test_time_dim_day_id2");
- client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME3, measures, dimensions, derivedProperties, 20L);
-
- // create base cube facts
- createBaseCubeFacts(client);
- // create join and union ctx facts
- createUnionAndJoinContextFacts(client);
- }
-
- private void createUnionAndJoinContextFacts(CubeMetastoreClient client) throws HiveException, LensException {
- String prefix = "union_join_ctx_";
- String derivedCubeName = prefix + "der1";
- Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
- Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>();
- updates.add(DAILY);
-
- ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>();
- List<String> timePartCols = new ArrayList<String>();
- partCols.add(TestCubeMetastoreClient.getDatePartition());
- timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey());
-
- StorageTableDesc s1 = new StorageTableDesc();
- s1.setInputFormat(TextInputFormat.class.getCanonicalName());
- s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- s1.setPartCols(partCols);
- s1.setTimePartCols(timePartCols);
-
- storageAggregatePeriods.put(c1, updates);
-
- Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c1, s1);
-
- // create fact1 (all dim attributes only msr1)
- String factName = prefix + "fact1";
- List<FieldSchema> factColumns = new ArrayList<FieldSchema>();
- factColumns.add(new ColumnMeasure(new FieldSchema(prefix + "msr1", "int", "first measure")).getColumn());
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema(prefix + "zipcode", "int", "zip"));
- factColumns.add(new FieldSchema(prefix + "cityid", "int", "city id"));
- // add fact start and end time property
- Map<String, String> properties = Maps.newHashMap(factValidityProperties);
- properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false");
- properties.put(MetastoreConstants.FACT_ABSOLUTE_START_TIME, DateUtil.relativeToAbsolute("now.day - 90 days"));
- properties.put(MetastoreConstants.FACT_ABSOLUTE_END_TIME, DateUtil.relativeToAbsolute("now.day - 30 days"));
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, properties,
- storageTables);
-
- // create fact2 with same schema, but it starts after fact1 ends
- factName = prefix + "fact2";
- properties.clear();
- //factColumns.add(new ColumnMeasure(new FieldSchema(prefix + "msr2", "int", "second measure")).getColumn());
- // add fact start and end time property
- properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false");
- properties.put(MetastoreConstants.FACT_ABSOLUTE_START_TIME, DateUtil.relativeToAbsolute("now.day - 31 days"));
- properties.put(MetastoreConstants.FACT_ABSOLUTE_END_TIME, DateUtil.relativeToAbsolute("now.day + 7 days"));
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, properties,
- storageTables);
-
- // create fact3 (all dim attributes only msr2)
- factName = prefix + "fact3";
- factColumns.clear();
- factColumns.add(new ColumnMeasure(new FieldSchema(prefix + "msr2", "int", "second measure")).getColumn());
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema(prefix + "zipcode", "int", "zip"));
- factColumns.add(new FieldSchema(prefix + "cityid", "int", "city id"));
- properties.clear();
- // add fact start and end time property
- properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false");
- properties.put(MetastoreConstants.FACT_ABSOLUTE_START_TIME, DateUtil.relativeToAbsolute("now.day - 90 days"));
- properties.put(MetastoreConstants.FACT_ABSOLUTE_END_TIME, DateUtil.relativeToAbsolute("now.day + 7 days"));
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, properties,
- storageTables);
-
- /*
- // create fact4 will all all measures and entire timerange covered
- factName = prefix + "fact4";
- factColumns.add(new ColumnMeasure(new FieldSchema(prefix + "msr1", "int", "first measure")).getColumn());
- properties.clear();
- properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false");
- properties.put(MetastoreConstants.FACT_ABSOLUTE_START_TIME, DateUtil.relativeToAbsolute("now.day - 90 days"));
- properties.put(MetastoreConstants.FACT_ABSOLUTE_END_TIME, DateUtil.relativeToAbsolute("now.day + 7 days"));
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L,
- properties, storageTables);
- */
- // create fact5 and fact6 with msr3 and covering timerange as set
- factName = prefix + "fact5";
- factColumns.clear();
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema(prefix + "zipcode", "int", "zip"));
- factColumns.add(new FieldSchema(prefix + "cityid", "int", "city id"));
- factColumns.add(new ColumnMeasure(new FieldSchema(prefix + "msr3", "int", "third measure")).getColumn());
- properties.clear();
- properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false");
- properties.put(MetastoreConstants.FACT_ABSOLUTE_START_TIME, DateUtil.relativeToAbsolute("now.day - 90 days"));
- properties.put(MetastoreConstants.FACT_ABSOLUTE_END_TIME, DateUtil.relativeToAbsolute("now.day -30 days"));
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, properties,
- storageTables);
-
- factName = prefix + "fact6";
- properties.clear();
- properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false");
- properties.put(MetastoreConstants.FACT_ABSOLUTE_START_TIME, DateUtil.relativeToAbsolute("now.day -31 days"));
- properties.put(MetastoreConstants.FACT_ABSOLUTE_END_TIME, DateUtil.relativeToAbsolute("now.day + 7 days"));
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, properties,
- storageTables);
-
- // Create derived cube
- Map<String, String> derivedProperties = new HashMap<>();
- derivedProperties.put(MetastoreConstants.CUBE_ALL_FIELDS_QUERIABLE, "true");
- Set<String> measures = new HashSet<>();
- measures.add(prefix + "msr1");
- measures.add(prefix + "msr2");
- measures.add(prefix + "msr3");
- Set<String> dimensions = new HashSet<>();
- dimensions.add(prefix + "cityid");
- dimensions.add(prefix + "zipcode");
- dimensions.add("d_time");
- dimensions.add(prefix + "cityname");
- client.createDerivedCube(BASE_CUBE_NAME, derivedCubeName, measures, dimensions, derivedProperties, 5L);
-
- }
-
- private void createBaseCubeFacts(CubeMetastoreClient client) throws HiveException, LensException {
-
- Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
- Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>();
- updates.add(MINUTELY);
- updates.add(HOURLY);
- updates.add(DAILY);
- updates.add(MONTHLY);
- updates.add(QUARTERLY);
- updates.add(YEARLY);
-
- ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>();
- List<String> timePartCols = new ArrayList<String>();
- partCols.add(TestCubeMetastoreClient.getDatePartition());
- timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey());
-
- StorageTableDesc s1 = new StorageTableDesc();
- s1.setInputFormat(TextInputFormat.class.getCanonicalName());
- s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- s1.setPartCols(partCols);
- s1.setTimePartCols(timePartCols);
-
- StorageTableDesc s2 = new StorageTableDesc();
- s2.setInputFormat(TextInputFormat.class.getCanonicalName());
- s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- ArrayList<FieldSchema> s2PartCols = new ArrayList<FieldSchema>();
- s2PartCols.add(new FieldSchema("ttd", serdeConstants.STRING_TYPE_NAME, "test date partition"));
- s2PartCols.add(new FieldSchema("ttd2", serdeConstants.STRING_TYPE_NAME, "test date partition"));
- s2.setPartCols(s2PartCols);
- s2.setTimePartCols(Arrays.asList("ttd", "ttd2"));
-
- storageAggregatePeriods.put(c1, updates);
- storageAggregatePeriods.put(c2, updates);
- storageAggregatePeriods.put(c3, updates);
- storageAggregatePeriods.put(c4, updates);
-
- Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c1, s1);
- storageTables.put(c4, s2);
- storageTables.put(c2, s1);
- storageTables.put(c3, s1);
-
- String factName = "testFact1_BASE";
- List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size());
- for (CubeMeasure measure : cubeMeasures) {
- factColumns.add(measure.getColumn());
- }
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time"));
- factColumns.add(new FieldSchema("zipcode", "int", "zip"));
- factColumns.add(new FieldSchema("cityid", "int", "city id"));
- factColumns.add(new FieldSchema("stateid", "int", "state id"));
- factColumns.add(new FieldSchema("userid", "int", "user id"));
- factColumns.add(new FieldSchema("xuserid", "int", "user id"));
- factColumns.add(new FieldSchema("yuserid", "int", "user id"));
- factColumns.add(new FieldSchema("dim1", "string", "base dim"));
- factColumns.add(new FieldSchema("dim11", "string", "base dim"));
- factColumns.add(new FieldSchema("test_time_dim_hour_id", "int", "time id"));
-
- // create cube fact with materialized expressions
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L,
- factValidityProperties, storageTables);
-
- factName = "testFact5_BASE";
- factColumns = new ArrayList<>(cubeMeasures.size());
- for (CubeMeasure measure : cubeMeasures) {
- factColumns.add(measure.getColumn());
- }
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time"));
- factColumns.add(new FieldSchema("dim1", "string", "base dim"));
- factColumns.add(new FieldSchema("booleancut", "boolean", "expr dim"));
-
- // create cube fact
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 150L,
- factValidityProperties, storageTables);
-
- // create fact only with extra measures
- factName = "testFact2_BASE";
- factColumns = new ArrayList<FieldSchema>();
- factColumns.add(new FieldSchema("msr12", "float", "second measure"));
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time"));
- factColumns.add(new FieldSchema("dim1", "string", "base dim"));
- factColumns.add(new FieldSchema("dim11", "string", "base dim"));
- factColumns.add(new FieldSchema("dim2", "int", "dim2 id"));
- factColumns.add(new FieldSchema("userid", "int", "user id"));
- factColumns.add(new FieldSchema("xuserid", "int", "user id"));
- factColumns.add(new FieldSchema("yuserid", "int", "user id"));
- // create cube fact
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L,
- factValidityProperties, storageTables);
- Map<String, String> properties = Maps.newHashMap(factValidityProperties);
- properties.put(MetastoreConstants.FACT_ABSOLUTE_END_TIME, DateUtil.relativeToAbsolute("now.day - 2 days"));
- properties.put(MetastoreConstants.FACT_ABSOLUTE_START_TIME, DateUtil.relativeToAbsolute("now.day - 3 days"));
- client.createCubeFactTable(BASE_CUBE_NAME, "testfact_deprecated", factColumns, storageAggregatePeriods, 5L,
- properties, storageTables);
-
- // create fact only with extra measures
- factName = "testFact3_BASE";
- factColumns = new ArrayList<FieldSchema>();
- factColumns.add(new FieldSchema("msr13", "double", "third measure"));
- factColumns.add(new FieldSchema("msr14", "bigint", "fourth measure"));
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time"));
- factColumns.add(new FieldSchema("dim1", "string", "base dim"));
- factColumns.add(new FieldSchema("dim11", "string", "base dim"));
-
- // create cube fact
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L,
- factValidityProperties, storageTables);
-
- // create fact with materialized expression
- factName = "testFact6_BASE";
- factColumns = new ArrayList<>();
- factColumns.add(new FieldSchema("msr13", "double", "third measure"));
- factColumns.add(new FieldSchema("msr14", "bigint", "fourth measure"));
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time"));
- factColumns.add(new FieldSchema("dim1", "string", "base dim"));
- factColumns.add(new FieldSchema("booleancut", "boolean", "expr dim"));
-
- // create cube fact
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 150L,
- factValidityProperties, storageTables);
-
- // create raw fact only with extra measures
- factName = "testFact2_RAW_BASE";
- factColumns = new ArrayList<FieldSchema>();
- factColumns.add(new FieldSchema("msr11", "int", "first measure"));
- factColumns.add(new FieldSchema("msr12", "float", "second measure"));
- factColumns.add(new FieldSchema("msr9", "bigint", "ninth measure"));
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time"));
- factColumns.add(new FieldSchema("dim1", "string", "base dim"));
- factColumns.add(new FieldSchema("dim11", "string", "base dim"));
- factColumns.add(new FieldSchema("dim13", "string", "base dim"));
- factColumns.add(new FieldSchema("dim12", "string", "base dim"));
- factColumns.add(new FieldSchema("dim22", "string", "base dim"));
- factColumns.add(new FieldSchema("cityid", "int", "city id"));
-
- storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
- updates = new HashSet<UpdatePeriod>();
- updates.add(HOURLY);
- storageAggregatePeriods.put(c1, updates);
-
- storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c1, s1);
-
- // create cube fact
- properties.clear();
- properties.putAll(factValidityProperties);
- properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false");
- properties.put(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG, "f2");
-
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties,
- storageTables);
-
- // create raw fact only with extra measures
- factName = "testFact3_RAW_BASE";
- factColumns = new ArrayList<FieldSchema>();
- factColumns.add(new FieldSchema("msr13", "double", "third measure"));
- factColumns.add(new FieldSchema("msr14", "bigint", "fourth measure"));
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time"));
- factColumns.add(new FieldSchema("dim1", "string", "base dim"));
- factColumns.add(new FieldSchema("dim11", "string", "base dim"));
- factColumns.add(new FieldSchema("dim12", "string", "base dim"));
-
- storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
- updates = new HashSet<UpdatePeriod>();
- updates.add(HOURLY);
- storageAggregatePeriods.put(c1, updates);
-
- storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c1, s1);
- properties.put(MetastoreConstants.FACT_COL_START_TIME_PFX.concat("user_id_added_in_past"), "2016-01-01");
- properties.put(MetastoreConstants.FACT_COL_END_TIME_PFX.concat("user_id_deprecated"), "2016-01-01");
- properties.put(MetastoreConstants.FACT_COL_START_TIME_PFX.concat("user_id_added_far_future"), "2099-01-01");
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties,
- storageTables);
-
- factName = "testFact4_RAW_BASE";
- factColumns = new ArrayList<FieldSchema>();
- factColumns.add(new FieldSchema("msr13", "double", "third measure"));
- factColumns.add(new FieldSchema("msr14", "bigint", "fourth measure"));
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time"));
- factColumns.add(new FieldSchema("dim1", "string", "base dim"));
- factColumns.add(new FieldSchema("user_id_added_in_past", "int", "user id"));
- factColumns.add(new FieldSchema("user_id_added_far_future", "int", "user id"));
- factColumns.add(new FieldSchema("user_id_deprecated", "int", "user id"));
-
- storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c1, s1);
- properties.put(MetastoreConstants.FACT_COL_START_TIME_PFX.concat("user_id_added_in_past"), "2016-01-01");
- properties.put(MetastoreConstants.FACT_COL_END_TIME_PFX.concat("user_id_deprecated"), "2016-01-01");
- properties.put(MetastoreConstants.FACT_COL_START_TIME_PFX.concat("user_id_added_far_future"), "2099-01-01");
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties,
- storageTables);
-
- factName = "testFact5_RAW_BASE";
- factColumns = new ArrayList<FieldSchema>();
- factColumns.add(new FieldSchema("msr9", "bigint", "ninth measure"));
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time"));
- factColumns.add(new FieldSchema("dim1", "string", "base dim"));
-
- properties.clear();
- properties.putAll(factValidityProperties);
- properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false");
- properties.put(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG, "f2");
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties,
- storageTables);
-
- CubeFactTable fact = client.getFactTable(factName);
- // Add all hourly partitions for two days
- Calendar cal = Calendar.getInstance();
- cal.setTime(TWODAYS_BACK);
- Date temp = cal.getTime();
- while (!(temp.after(NOW))) {
- Map<String, Date> timeParts = new HashMap<String, Date>();
- timeParts.put("dt", temp);
- StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, HOURLY);
- client.addPartition(sPartSpec, c1, CubeTableType.FACT);
- cal.add(HOUR_OF_DAY, 1);
- temp = cal.getTime();
- }
- }
-
- private void createCubeContinuousFact(CubeMetastoreClient client) throws Exception {
- // create continuous raw fact only with extra measures
- String factName = "testFact_CONTINUOUS";
- List<FieldSchema> factColumns = new ArrayList<FieldSchema>();
- factColumns.add(new FieldSchema("msr11", "double", "third measure"));
- factColumns.add(new FieldSchema("msr15", "int", "fifteenth measure"));
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
- factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time"));
- factColumns.add(new FieldSchema("dim1", "string", "base dim"));
- factColumns.add(new FieldSchema("dim11", "string", "base dim"));
- factColumns.add(new FieldSchema("dim12", "string", "base dim"));
-
- Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
- Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>();
- updates.add(CONTINUOUS);
- storageAggregatePeriods.put(c0, updates);
-
- StorageTableDesc s0 = new StorageTableDesc();
- s0.setInputFormat(TextInputFormat.class.getCanonicalName());
- s0.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
-
- Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c0, s0);
- Map<String, String> properties = Maps.newHashMap(factValidityProperties);
- properties.put(MetastoreConstants.FACT_ABSOLUTE_START_TIME, DateUtil.relativeToAbsolute("now.day - 3 days"));
-
- client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties,
- storageTables);
- }
+ private Set<ExprColumn> exprs;
- private void createCubeFact(CubeMetastoreClient client) throws Exception {
+ private void assertTestFactTimelineClass(CubeMetastoreClient client) throws Exception {
String factName = "testFact";
- List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size());
- for (CubeMeasure measure : cubeMeasures) {
- if (!measure.getColumn().getName().equals("msr15")) { //do not add msr15
- factColumns.add(measure.getColumn());
- }
- }
- factColumns.add(new FieldSchema("msr5", "double", "msr5"));
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("zipcode", "int", "zip"));
- factColumns.add(new FieldSchema("cityid", "int", "city id"));
- factColumns.add(new FieldSchema("cityid1", "int", "city id"));
- factColumns.add(new FieldSchema("stateid", "int", "city id"));
- factColumns.add(new FieldSchema("test_time_dim_day_id", "int", "time id"));
- factColumns.add(new FieldSchema("test_time_dim_day_id2", "int", "time id"));
- factColumns.add(new FieldSchema("ambigdim1", "string", "used in" + " testColumnAmbiguity"));
-
- Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
- Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>();
- updates.add(MINUTELY);
- updates.add(HOURLY);
- updates.add(DAILY);
- updates.add(MONTHLY);
- updates.add(QUARTERLY);
- updates.add(YEARLY);
-
- ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>();
- List<String> timePartCols = new ArrayList<String>();
- partCols.add(TestCubeMetastoreClient.getDatePartition());
- timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey());
-
- StorageTableDesc s1 = new StorageTableDesc();
- s1.setInputFormat(TextInputFormat.class.getCanonicalName());
- s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- s1.setPartCols(partCols);
- s1.setTimePartCols(timePartCols);
-
- StorageTableDesc s2 = new StorageTableDesc();
- s2.setInputFormat(TextInputFormat.class.getCanonicalName());
- s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- ArrayList<FieldSchema> s2PartCols = new ArrayList<FieldSchema>();
- s2PartCols.add(new FieldSchema("ttd", serdeConstants.STRING_TYPE_NAME, "test date partition"));
- s2PartCols.add(new FieldSchema("ttd2", serdeConstants.STRING_TYPE_NAME, "test date partition"));
- s2.setPartCols(s2PartCols);
- s2.setTimePartCols(Arrays.asList("ttd", "ttd2"));
-
- StorageTableDesc s3 = new StorageTableDesc();
- s3.setInputFormat(TextInputFormat.class.getCanonicalName());
- s3.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- s3.setPartCols(partCols);
- s3.setTimePartCols(timePartCols);
- s3.getTblProps().put(MetastoreUtil.getStoragetableStartTimesKey(), "now.day - 90 days");
- s3.getTblProps().put(MetastoreUtil.getStoragetableEndTimesKey(), "now.day - 10 days");
-
- StorageTableDesc s5 = new StorageTableDesc();
- s5.setInputFormat(TextInputFormat.class.getCanonicalName());
- s5.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- s5.setPartCols(partCols);
- s5.setTimePartCols(timePartCols);
- s5.getTblProps().put(MetastoreUtil.getStoragetableStartTimesKey(), "now.day - 10 days");
-
- storageAggregatePeriods.put(c1, updates);
- storageAggregatePeriods.put(c2, updates);
- storageAggregatePeriods.put(c3, updates);
- storageAggregatePeriods.put(c4, updates);
- storageAggregatePeriods.put(c5, updates);
-
- Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c1, s1);
- storageTables.put(c4, s2);
- storageTables.put(c2, s1);
- storageTables.put(c3, s3);
- storageTables.put(c5, s5);
-
- //add storage with continuous update period
- updates.add(CONTINUOUS);
- storageAggregatePeriods.put(c0, updates);
- StorageTableDesc s0 = new StorageTableDesc();
- s0.setInputFormat(TextInputFormat.class.getCanonicalName());
- s0.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- storageTables.put(c0, s0);
-
- // create cube fact
- client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L,
- factValidityProperties, storageTables);
+
client.getTimelines(factName, c1, null, null);
client.getTimelines(factName, c4, null, null);
+
client.clearHiveTableCache();
+
CubeFactTable fact = client.getFactTable(factName);
Table table = client.getTable(MetastoreUtil.getStorageTableName(fact.getName(), Storage.getPrefix(c1)));
assertEquals(table.getParameters().get(MetastoreUtil.getPartitionTimelineCachePresenceKey()), "true");
@@ -1857,62 +572,8 @@ public class CubeTestSetup {
assertTimeline(client, factName, storageName, updatePeriod, timeDim, expectedTimeline);
}
- private void createCubeCheapFact(CubeMetastoreClient client) throws HiveException, LensException {
+ private void createCubeCheapFactPartitions(CubeMetastoreClient client) throws HiveException, LensException {
String factName = "cheapFact";
- List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size());
- for (CubeMeasure measure : cubeMeasures) {
- factColumns.add(measure.getColumn());
- }
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("zipcode", "int", "zip"));
- factColumns.add(new FieldSchema("cityid", "int", "city id"));
- factColumns.add(new FieldSchema("stateid", "int", "city id"));
- factColumns.add(new FieldSchema("test_time_dim_hour_id", "int", "time id"));
- factColumns.add(new FieldSchema("ambigdim1", "string", "used in" + " testColumnAmbiguity"));
-
- Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
- Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>();
- updates.add(MINUTELY);
- updates.add(HOURLY);
- updates.add(DAILY);
- updates.add(MONTHLY);
- updates.add(QUARTERLY);
- updates.add(YEARLY);
-
- ArrayList<FieldSchema> partCols = new ArrayList<>();
- List<String> timePartCols = new ArrayList<>();
- partCols.add(TestCubeMetastoreClient.getDatePartition());
- timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey());
-
- StorageTableDesc s1 = new StorageTableDesc();
- s1.setInputFormat(TextInputFormat.class.getCanonicalName());
- s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- s1.setPartCols(partCols);
- s1.setTimePartCols(timePartCols);
- s1.setTblProps(new HashMap<String, String>());
- s1.getTblProps().put(MetastoreUtil.getStoragetableStartTimesKey(), "2000, now - 10 years");
- s1.getTblProps().put(MetastoreUtil.getStoragetableEndTimesKey(), "now - 5 years, 2010");
-
- StorageTableDesc s2 = new StorageTableDesc();
- s2.setInputFormat(TextInputFormat.class.getCanonicalName());
- s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- ArrayList<FieldSchema> s2PartCols = new ArrayList<>();
- s2PartCols.add(new FieldSchema("ttd", serdeConstants.STRING_TYPE_NAME, "test date partition"));
- s2PartCols.add(new FieldSchema("ttd2", serdeConstants.STRING_TYPE_NAME, "test date partition"));
- s2.setPartCols(s2PartCols);
- s2.setTimePartCols(Arrays.asList("ttd", "ttd2"));
-
- storageAggregatePeriods.put(c99, updates);
- storageAggregatePeriods.put(c0, updates);
-
- Map<String, StorageTableDesc> storageTables = new HashMap<>();
- storageTables.put(c99, s2);
- storageTables.put(c0, s1);
- // create cube fact
- client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L,
- factValidityProperties, storageTables);
-
CubeFactTable fact = client.getFactTable(factName);
// Add all hourly partitions for two days
Calendar cal = Calendar.getInstance();
@@ -1942,88 +603,9 @@ public class CubeTestSetup {
}
}
- private void createCubeFactWeekly(CubeMetastoreClient client) throws Exception {
- String factName = "testFactWeekly";
- List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size());
- for (CubeMeasure measure : cubeMeasures) {
- factColumns.add(measure.getColumn());
- }
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("zipcode", "int", "zip"));
-
- Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
- Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>();
- updates.add(WEEKLY);
- ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>();
- List<String> timePartCols = new ArrayList<String>();
- partCols.add(TestCubeMetastoreClient.getDatePartition());
- timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey());
- StorageTableDesc s1 = new StorageTableDesc();
- s1.setInputFormat(TextInputFormat.class.getCanonicalName());
- s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- s1.setPartCols(partCols);
- s1.setTimePartCols(timePartCols);
-
- storageAggregatePeriods.put(c1, updates);
-
- Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c1, s1);
- // create cube fact
- client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L,
- factValidityProperties, storageTables);
- }
- private void createCubeFactOnlyHourly(CubeMetastoreClient client) throws Exception {
+ private void createTestFact2Partitions(CubeMetastoreClient client) throws Exception {
String factName = "testFact2";
- List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size());
- for (CubeMeasure measure : cubeMeasures) {
- if (!measure.getName().equals("msr4")) {
- factColumns.add(measure.getColumn());
- }
- }
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("zipcode", "int", "zip"));
- factColumns.add(new FieldSchema("cityid", "int", "city id"));
- factColumns.add(new FieldSchema("cityid2", "int", "city id"));
- factColumns.add(new FieldSchema("test_time_dim_hour_id", "int", "time id"));
- factColumns.add(new FieldSchema("test_time_dim_hour_id2", "int", "time id"));
- factColumns.add(new FieldSchema("cdim2", "int", "cycledim id"));
-
- Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
- Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>();
- updates.add(HOURLY);
- ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>();
- List<String> timePartCols = new ArrayList<String>();
- partCols.add(TestCubeMetastoreClient.getDatePartition());
- timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey());
- StorageTableDesc s1 = new StorageTableDesc();
- s1.setInputFormat(TextInputFormat.class.getCanonicalName());
- s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- s1.setPartCols(partCols);
- s1.setTimePartCols(timePartCols);
-
- StorageTableDesc s2 = new StorageTableDesc();
- s2.setInputFormat(TextInputFormat.class.getCanonicalName());
- s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- ArrayList<FieldSchema> s2PartCols = new ArrayList<FieldSchema>();
- s2PartCols.add(new FieldSchema("ttd", serdeConstants.STRING_TYPE_NAME, "test date partition"));
- s2PartCols.add(new FieldSchema("ttd2", serdeConstants.STRING_TYPE_NAME, "test date partition"));
- s2.setPartCols(s2PartCols);
- s2.setTimePartCols(Arrays.asList("ttd", "ttd2"));
-
- storageAggregatePeriods.put(c1, updates);
- storageAggregatePeriods.put(c4, updates);
-
- Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c1, s1);
- storageTables.put(c4, s2);
-
- // create cube fact
- client
- .createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 10L,
- factValidityProperties, storageTables);
CubeFactTable fact = client.getFactTable(factName);
// Add all hourly partitions for two days
Calendar cal = Calendar.getInstance();
@@ -2110,54 +692,8 @@ public class CubeTestSetup {
}
}
- private void createCubeFactOnlyHourlyRaw(CubeMetastoreClient client) throws HiveException, LensException {
+ private void createTestFact2RawPartitions(CubeMetastoreClient client) throws HiveException, LensException {
String factName = "testFact2_raw";
- String factName2 = "testFact1_raw_BASE";
- List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size());
- for (CubeMeasure measure : cubeMeasures) {
- factColumns.add(measure.getColumn());
- }
-
- // add dimensions of the cube
- factColumns.add(new FieldSchema("zipcode", "int", "zip"));
- factColumns.add(new FieldSchema("cityid", "int", "city id"));
- factColumns.add(new FieldSchema("cityid1", "int", "city id"));
- factColumns.add(new FieldSchema("cityid2", "int", "city id"));
- factColumns.add(new FieldSchema("stateid", "int", "state id"));
- factColumns.add(new FieldSchema("countryid", "int", "country id"));
- factColumns.add(new FieldSchema("dim1", "string", "dim1"));
- factColumns.add(new FieldSchema("dim2", "int", "dim2"));
- factColumns.add(new FieldSchema("concatedCityState", "string", "citystate"));
-
- Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
- Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>();
- updates.add(HOURLY);
- ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>();
- List<String> timePartCols = new ArrayList<String>();
- partCols.add(TestCubeMetastoreClient.getDatePartition());
- timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey());
- StorageTableDesc s1 = new StorageTableDesc();
- s1.setInputFormat(TextInputFormat.class.getCanonicalName());
- s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- s1.setPartCols(partCols);
- s1.setTimePartCols(timePartCols);
- storageAggregatePeriods.put(c1, updates);
- storageAggregatePeriods.put(c3, updates);
-
- Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c1, s1);
- storageTables.put(c3, s1);
-
- // create cube fact
- Map<String, String> properties = new HashMap<String, String>();
- properties.putAll(factValidityProperties);
- properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false");
- properties.put(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG, "f1");
-
- client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties,
- storageTables);
- client.createCubeFactTable(BASE_CUBE_NAME, factName2, factColumns, storageAggregatePeriods, 100L, properties,
- storageTables);
CubeFactTable fact2 = client.getFactTable(factName);
// Add all hourly partitions for two days
Calendar cal = Calendar.getInstance();
@@ -2173,917 +709,6 @@ public class CubeTestSetup {
}
}
- private void createCubeFactMonthly(CubeMetastoreClient client) throws Exception {
- String factName = "testFactMonthly";
- List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size());
- for (CubeMeasure measure : cubeMeasures) {
- factColumns.add(measure.getColumn());
- }
-
- // add one dimension of the cube
- factColumns.add(new FieldSchema("countryid", "int", "country id"));
-
- Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
- Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>();
- updates.add(MONTHLY);
- ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>();
- List<String> timePartCols = new ArrayList<String>();
- partCols.add(TestCubeMetastoreClient.getDatePartition());
- timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey());
- StorageTableDesc s1 = new StorageTableDesc();
- s1.setInputFormat(TextInputFormat.class.getCanonicalName());
- s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- s1.setPartCols(partCols);
- s1.setTimePartCols(timePartCols);
-
- storageAggregatePeriods.put(c2, updates);
-
- Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c2, s1);
-
- // create cube fact
- client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L,
- factValidityProperties, storageTables);
- }
-
- // DimWithTwoStorages
- private void createCityTable(CubeMetastoreClient client) throws Exception {
- Set<CubeDimAttribute> cityAttrs = new HashSet<CubeDimAttribute>();
- cityAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code")));
- cityAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "city name")));
- cityAttrs.add(new BaseDimAttribute(new FieldSchema("ambigdim1", "string", "used in testColumnAmbiguity")));
- cityAttrs.add(new BaseDimAttribute(new FieldSchema("ambigdim2", "string", "used in testColumnAmbiguity")));
- cityAttrs.add(new BaseDimAttribute(new FieldSchema("nocandidatecol", "string", "used in testing no"
- + " candidate available")));
- cityAttrs.add(new BaseDimAttribute(new FieldSchema("stateid", "int", "state id")));
- cityAttrs.add(new ReferencedDimAttribute(new FieldSchema("statename", "string", "state name"), "State name",
- "citystate", "name", null, null, null, null));
- cityAttrs.add(new BaseDimAttribute(new FieldSchema("zipcode", "int", "zip code")));
- Map<String, String> dimProps = new HashMap<String, String>();
- dimProps.put(MetastoreUtil.getDimTimedDimensionKey("citydim"), TestCubeMetastoreClient.getDatePartitionKey());
- Set<ExprColumn> exprs = new HashSet<ExprColumn>();
- exprs.add(new ExprColumn(new FieldSchema("CityAddress", "string", "city with state and city and zip"),
- "City Address",
- new ExprSpec("concat(citydim.name, \":\", citystate.name, \":\", citycountry.name, \":\", cityzip.code)", null,
- null), new ExprSpec("concat(citydim.name, \":\", citystate.name)", null, null)));
- exprs.add(new ExprColumn(new FieldSchema("CityState", "string", "city's state"),
- "City State", new ExprSpec("concat(citydim.name, \":\", citydim.statename)", null, null)));
- exprs.add(new ExprColumn(new FieldSchema("AggrExpr", "int", "count(name)"), "city count",
- new ExprSpec("count(name)", null, null)));
- Set<JoinChain> joinchains = new HashSet<JoinChain>() {
- {
- add(new JoinChain("cityState", "city-state", "state thru city") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("citydim", "stateid"));
- add(new TableReference("statedim", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("citydim", "statename"));
- add(new TableReference("statedim", "name"));
- }
- });
- }
- });
- }
- };
- joinchains.add(new JoinChain("cityCountry", "cube-zip", "country thru city") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("citydim", "stateid"));
- add(new TableReference("statedim", "id"));
- add(new TableReference("statedim", "countryid"));
- add(new TableReference("countrydim", "id"));
- }
- });
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("citydim", "statename"));
- add(new TableReference("statedim", "name"));
- add(new TableReference("statedim", "countryid"));
- add(new TableReference("countrydim", "id"));
- }
- });
- }
- });
- joinchains.add(new JoinChain("cityZip", "city-zip", "Zipcode thru city") {
- {
- addPath(new ArrayList<TableReference>() {
- {
- add(new TableReference("citydim", "zipcode"));
- add(new TableReference("zipdim", "code"));
- }
- });
- }
- });
- Dimension cityDim = new Dimension("citydim", cityAttrs, exprs, joinchains, dimProps, 0L);
- client.createDimension(cityDim);
-
- String dimName = "citytable";
-
- List<FieldSchema> dimColumns = new ArrayList<FieldSchema>();
- dimColumns.add(new FieldSchema("id", "int", "code"));
- dimColumns.add(new FieldSchema("name", "string", "field1"));
- dimColumns.add(new FieldSchema("stateid", "int", "state id"));
- dimColumns.add(new FieldSchema("zipcode", "int", "zip code"));
- dimColumns.add(new FieldSchema("ambigdim1", "string", "used in" + " testColumnAmbiguity"));
- dimColumns.add(new FieldSchema("ambigdim2", "string", "used in " + "testColumnAmbiguity"));
-
- Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>();
- ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>();
- List<String> timePartCols = new ArrayList<String>();
- partCols.add(TestCubeMetastoreClient.getDatePartition());
- timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey());
- StorageTableDesc s1 = new StorageTableDesc();
- s1.setInputFormat(TextInputFormat.class.getCanonicalName());
- s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- s1.setPartCols(partCols);
- s1.setTimePartCols(timePartCols);
- Map<String, String> tblPros = Maps.newHashMap();
- tblPros.put(LensConfConstants.STORAGE_COST, "100");
- s1.setTblProps(tblPros);
- dumpPeriods.put(c1, HOURLY);
-
- StorageTableDesc s2 = new StorageTableDesc();
- s2.setInputFormat(TextInputFormat.class.getCanonicalName());
- s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
- dumpPeriods.put(c2, null);
-
- Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c1, s1);
- storageTables.put(c2, s2);
-
- client.createCubeDimensionTable(cityDim.getName(), dimName, dimColumns, 0L, dumpPeriods, dimProps, storageTables);
-
- dimName = "citytable2";
-
- dimColumns = new ArrayList<FieldSchema>();
- dimColumns.add(new FieldSchema("id", "int", "code"));
- dimColumns.add(new FieldSchema("stateid", "int", "state id"));
-
- dumpPeriods = new HashMap<String, UpdatePeriod>();
- storageTables = new HashMap<String, StorageTableDesc>();
- storageTables.put(c4, s2);
- dumpPeriods.put(c4, null);
-
- client.createCubeDimensionTable(cityDim.getName(), dimName, dimColumns, 0L, dumpPeriods, dimProps, storageTables);
-
- dimName = "citytable3";
-
- dimColumns = new ArrayList<FieldSchema>();
- dimColumns.add(new FieldSchema("id", "int", "code"));
- dimColumns.add(new FieldSchema("name", "string", "name"));
-
- client.createCubeDimensionTable(cityDim.getName(), dimName, dimColumns, 0L, dumpPeriods, dimProps, storageTables);
-
- dimName = "citytable4";
-
- dimColumns = new ArrayList<FieldSchema>();
- dimColumns.add(new Field
<TRUNCATED>