You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2014/03/28 00:42:57 UTC
svn commit: r1582547 [2/2] - in /hive/branches/branch-0.13: data/files/
metastore/if/ metastore/src/gen/thrift/gen-cpp/
metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/
metastore/src/gen/thrift/gen-php/metastore/ metastore/sr...
Modified: hive/branches/branch-0.13/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb?rev=1582547&r1=1582546&r2=1582547&view=diff
==============================================================================
--- hive/branches/branch-0.13/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb (original)
+++ hive/branches/branch-0.13/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb Thu Mar 27 23:42:56 2014
@@ -712,6 +712,52 @@ class BinaryColumnStatsData
::Thrift::Struct.generate_accessors self
end
+class Decimal
+ include ::Thrift::Struct, ::Thrift::Struct_Union
+ UNSCALED = 1
+ SCALE = 3
+
+ FIELDS = {
+ UNSCALED => {:type => ::Thrift::Types::STRING, :name => 'unscaled', :binary => true},
+ SCALE => {:type => ::Thrift::Types::I16, :name => 'scale'}
+ }
+
+ def struct_fields; FIELDS; end
+
+ def validate
+ raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field unscaled is unset!') unless @unscaled
+ raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field scale is unset!') unless @scale
+ end
+
+ ::Thrift::Struct.generate_accessors self
+end
+
+class DecimalColumnStatsData
+ include ::Thrift::Struct, ::Thrift::Struct_Union
+ LOWVALUE = 1
+ HIGHVALUE = 2
+ NUMNULLS = 3
+ NUMDVS = 4
+
+ FIELDS = {
+ LOWVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'lowValue', :class => ::Decimal},
+ HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Decimal},
+ NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'},
+ NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}
+ }
+
+ def struct_fields; FIELDS; end
+
+ def validate
+ raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field lowValue is unset!') unless @lowValue
+ raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field highValue is unset!') unless @highValue
+ raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numNulls is unset!') unless @numNulls
+ raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numDVs is unset!') unless @numDVs
+ end
+
+ ::Thrift::Struct.generate_accessors self
+end
+
class ColumnStatisticsData < ::Thrift::Union
include ::Thrift::Struct_Union
class << self
@@ -734,6 +780,10 @@ class ColumnStatisticsData < ::Thrift::U
def binaryStats(val)
ColumnStatisticsData.new(:binaryStats, val)
end
+
+ def decimalStats(val)
+ ColumnStatisticsData.new(:decimalStats, val)
+ end
end
BOOLEANSTATS = 1
@@ -741,13 +791,15 @@ class ColumnStatisticsData < ::Thrift::U
DOUBLESTATS = 3
STRINGSTATS = 4
BINARYSTATS = 5
+ DECIMALSTATS = 6
FIELDS = {
BOOLEANSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'booleanStats', :class => ::BooleanColumnStatsData},
LONGSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'longStats', :class => ::LongColumnStatsData},
DOUBLESTATS => {:type => ::Thrift::Types::STRUCT, :name => 'doubleStats', :class => ::DoubleColumnStatsData},
STRINGSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'stringStats', :class => ::StringColumnStatsData},
- BINARYSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'binaryStats', :class => ::BinaryColumnStatsData}
+ BINARYSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'binaryStats', :class => ::BinaryColumnStatsData},
+ DECIMALSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'decimalStats', :class => ::DecimalColumnStatsData}
}
def struct_fields; FIELDS; end
Modified: hive/branches/branch-0.13/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java?rev=1582547&r1=1582546&r2=1582547&view=diff
==============================================================================
--- hive/branches/branch-0.13/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java (original)
+++ hive/branches/branch-0.13/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java Thu Mar 27 23:42:56 2014
@@ -937,8 +937,9 @@ class MetaStoreDirectSql {
/** The common query part for table and partition stats */
private static final String STATS_COLLIST =
"\"COLUMN_NAME\", \"COLUMN_TYPE\", \"LONG_LOW_VALUE\", \"LONG_HIGH_VALUE\", "
- + "\"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", "
- + "\"AVG_COL_LEN\", \"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\"";
+ + "\"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", \"BIG_DECIMAL_LOW_VALUE\", "
+ + "\"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", \"AVG_COL_LEN\", "
+ + "\"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\" ";
private ColumnStatistics makeColumnStats(
List<Object[]> list, ColumnStatisticsDesc csd, int offset) {
@@ -948,7 +949,7 @@ class MetaStoreDirectSql {
for (Object[] row : list) {
// LastAnalyzed is stored per column but thrift has it per several;
// get the lowest for now as nobody actually uses this field.
- Object laObj = row[offset + 12];
+ Object laObj = row[offset + 14];
if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > (Long)laObj)) {
csd.setLastAnalyzed((Long)laObj);
}
@@ -957,10 +958,10 @@ class MetaStoreDirectSql {
int i = offset;
ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data);
Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
- nulls = row[i++], dist = row[i++], avglen = row[i++], maxlen = row[i++],
- trues = row[i++], falses = row[i++];
+ declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
+ avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
- llow, lhigh, dlow, dhigh, nulls, dist, avglen, maxlen, trues, falses);
+ llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses);
csos.add(cso);
}
result.setStatsObj(csos);
Modified: hive/branches/branch-0.13/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java?rev=1582547&r1=1582546&r2=1582547&view=diff
==============================================================================
--- hive/branches/branch-0.13/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java (original)
+++ hive/branches/branch-0.13/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java Thu Mar 27 23:42:56 2014
@@ -18,6 +18,9 @@
package org.apache.hadoop.hive.metastore;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
@@ -27,6 +30,8 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
@@ -74,6 +79,11 @@ public class StatObjectConverter {
DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(),
doubleStats.getLowValue(), doubleStats.getHighValue());
+ } else if (statsObj.getStatsData().isSetDecimalStats()) {
+ DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats();
+ String low = createJdoDecimalString(decimalStats.getLowValue()),
+ high = createJdoDecimalString(decimalStats.getHighValue());
+ mColStats.setDecimalStats(decimalStats.getNumNulls(), decimalStats.getNumDVs(), low, high);
} else if (statsObj.getStatsData().isSetStringStats()) {
StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
mColStats.setStringStats(stringStats.getNumNulls(), stringStats.getNumDVs(),
@@ -94,6 +104,8 @@ public class StatObjectConverter {
oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed());
oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue());
oldStatsObj.setDoubleLowValue(mStatsObj.getDoubleLowValue());
+ oldStatsObj.setDecimalLowValue(mStatsObj.getDecimalLowValue());
+ oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue());
oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen());
oldStatsObj.setNumDVs(mStatsObj.getNumDVs());
oldStatsObj.setNumFalses(mStatsObj.getNumFalses());
@@ -109,6 +121,8 @@ public class StatObjectConverter {
oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed());
oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue());
oldStatsObj.setDoubleLowValue(mStatsObj.getDoubleLowValue());
+ oldStatsObj.setDecimalLowValue(mStatsObj.getDecimalLowValue());
+ oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue());
oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen());
oldStatsObj.setNumDVs(mStatsObj.getNumDVs());
oldStatsObj.setNumFalses(mStatsObj.getNumFalses());
@@ -160,6 +174,13 @@ public class StatObjectConverter {
doubleStats.setLowValue(mStatsObj.getDoubleLowValue());
doubleStats.setNumDVs(mStatsObj.getNumDVs());
colStatsData.setDoubleStats(doubleStats);
+ } else if (colType.equals("decimal")) {
+ DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
+ decimalStats.setNumNulls(mStatsObj.getNumNulls());
+ decimalStats.setHighValue(createThriftDecimal(mStatsObj.getDecimalHighValue()));
+ decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue()));
+ decimalStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setDecimalStats(decimalStats);
}
statsObj.setStatsData(colStatsData);
return statsObj;
@@ -203,6 +224,11 @@ public class StatObjectConverter {
DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(),
doubleStats.getLowValue(), doubleStats.getHighValue());
+ } else if (statsObj.getStatsData().isSetDecimalStats()) {
+ DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats();
+ String low = createJdoDecimalString(decimalStats.getLowValue()),
+ high = createJdoDecimalString(decimalStats.getHighValue());
+ mColStats.setDecimalStats(decimalStats.getNumNulls(), decimalStats.getNumDVs(), low, high);
} else if (statsObj.getStatsData().isSetStringStats()) {
StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
mColStats.setStringStats(stringStats.getNumNulls(), stringStats.getNumDVs(),
@@ -259,6 +285,13 @@ public class StatObjectConverter {
doubleStats.setLowValue(mStatsObj.getDoubleLowValue());
doubleStats.setNumDVs(mStatsObj.getNumDVs());
colStatsData.setDoubleStats(doubleStats);
+ } else if (colType.equals("decimal")) {
+ DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
+ decimalStats.setNumNulls(mStatsObj.getNumNulls());
+ decimalStats.setHighValue(createThriftDecimal(mStatsObj.getDecimalHighValue()));
+ decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue()));
+ decimalStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setDecimalStats(decimalStats);
}
statsObj.setStatsData(colStatsData);
return statsObj;
@@ -277,8 +310,8 @@ public class StatObjectConverter {
// SQL
public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data,
- Object llow, Object lhigh, Object dlow, Object dhigh, Object nulls, Object dist,
- Object avglen, Object maxlen, Object trues, Object falses) {
+ Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh,
+ Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses) {
if (colType.equals("boolean")) {
BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
boolStats.setNumFalses((Long)falses);
@@ -315,6 +348,22 @@ public class StatObjectConverter {
doubleStats.setLowValue((Double)dlow);
doubleStats.setNumDVs((Long)dist);
data.setDoubleStats(doubleStats);
+ } else if (colType.equals("decimal")) {
+ DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
+ decimalStats.setNumNulls((Long)nulls);
+ decimalStats.setHighValue(createThriftDecimal((String)dechigh));
+ decimalStats.setLowValue(createThriftDecimal((String)declow));
+ decimalStats.setNumDVs((Long)dist);
+ data.setDecimalStats(decimalStats);
}
}
+
+ private static Decimal createThriftDecimal(String s) {
+ BigDecimal d = new BigDecimal(s);
+ return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short)d.scale());
+ }
+
+ private static String createJdoDecimalString(Decimal d) {
+ return new BigDecimal(new BigInteger(d.getUnscaled()), d.getScale()).toString();
+ }
}
Modified: hive/branches/branch-0.13/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java?rev=1582547&r1=1582546&r2=1582547&view=diff
==============================================================================
--- hive/branches/branch-0.13/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java (original)
+++ hive/branches/branch-0.13/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java Thu Mar 27 23:42:56 2014
@@ -44,6 +44,8 @@ public class MPartitionColumnStatistics
private long longHighValue;
private double doubleLowValue;
private double doubleHighValue;
+ private String decimalLowValue;
+ private String decimalHighValue;
private long numNulls;
private long numDVs;
private double avgColLen;
@@ -178,6 +180,14 @@ public class MPartitionColumnStatistics
this.doubleHighValue = highValue;
}
+ public void setDecimalStats(
+ long numNulls, long numNDVs, String lowValue, String highValue) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ this.decimalLowValue = lowValue;
+ this.decimalHighValue = highValue;
+ }
+
public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) {
this.numNulls = numNulls;
this.numDVs = numNDVs;
@@ -221,4 +231,20 @@ public class MPartitionColumnStatistics
public void setDoubleHighValue(double doubleHighValue) {
this.doubleHighValue = doubleHighValue;
}
+
+ public String getDecimalLowValue() {
+ return decimalLowValue;
+ }
+
+ public void setDecimalLowValue(String decimalLowValue) {
+ this.decimalLowValue = decimalLowValue;
+ }
+
+ public String getDecimalHighValue() {
+ return decimalHighValue;
+ }
+
+ public void setDecimalHighValue(String decimalHighValue) {
+ this.decimalHighValue = decimalHighValue;
+ }
}
Modified: hive/branches/branch-0.13/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java?rev=1582547&r1=1582546&r2=1582547&view=diff
==============================================================================
--- hive/branches/branch-0.13/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java (original)
+++ hive/branches/branch-0.13/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java Thu Mar 27 23:42:56 2014
@@ -42,6 +42,8 @@ public class MTableColumnStatistics {
private long longHighValue;
private double doubleLowValue;
private double doubleHighValue;
+ private String decimalLowValue;
+ private String decimalHighValue;
private long numNulls;
private long numDVs;
private double avgColLen;
@@ -168,6 +170,14 @@ public class MTableColumnStatistics {
this.doubleHighValue = highValue;
}
+ public void setDecimalStats(
+ long numNulls, long numNDVs, String lowValue, String highValue) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ this.decimalLowValue = lowValue;
+ this.decimalHighValue = highValue;
+ }
+
public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) {
this.numNulls = numNulls;
this.numDVs = numNDVs;
@@ -212,4 +222,21 @@ public class MTableColumnStatistics {
public void setDoubleHighValue(double doubleHighValue) {
this.doubleHighValue = doubleHighValue;
}
+
+
+ public String getDecimalLowValue() {
+ return decimalLowValue;
+ }
+
+ public void setDecimalLowValue(String decimalLowValue) {
+ this.decimalLowValue = decimalLowValue;
+ }
+
+ public String getDecimalHighValue() {
+ return decimalHighValue;
+ }
+
+ public void setDecimalHighValue(String decimalHighValue) {
+ this.decimalHighValue = decimalHighValue;
+ }
}
Modified: hive/branches/branch-0.13/metastore/src/model/package.jdo
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/metastore/src/model/package.jdo?rev=1582547&r1=1582546&r2=1582547&view=diff
==============================================================================
--- hive/branches/branch-0.13/metastore/src/model/package.jdo (original)
+++ hive/branches/branch-0.13/metastore/src/model/package.jdo Thu Mar 27 23:42:56 2014
@@ -825,6 +825,12 @@
<field name="doubleHighValue">
<column name="DOUBLE_HIGH_VALUE" jdbc-type="DOUBLE" allows-null="true"/>
</field>
+ <field name="decimalLowValue">
+ <column name="BIG_DECIMAL_LOW_VALUE" jdbc-type="VARCHAR" allows-null="true"/>
+ </field>
+ <field name="decimalHighValue">
+ <column name="BIG_DECIMAL_HIGH_VALUE" jdbc-type="VARCHAR" allows-null="true"/>
+ </field>
<field name="numNulls">
<column name="NUM_NULLS" jdbc-type="BIGINT" allows-null="false"/>
</field>
@@ -883,6 +889,12 @@
<field name="doubleHighValue">
<column name="DOUBLE_HIGH_VALUE" jdbc-type="DOUBLE" allows-null="true"/>
</field>
+ <field name="decimalLowValue">
+ <column name="BIG_DECIMAL_LOW_VALUE" jdbc-type="VARCHAR" allows-null="true"/>
+ </field>
+ <field name="decimalHighValue">
+ <column name="BIG_DECIMAL_HIGH_VALUE" jdbc-type="VARCHAR" allows-null="true"/>
+ </field>
<field name="numNulls">
<column name="NUM_NULLS" jdbc-type="BIGINT" allows-null="false"/>
</field>
Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java?rev=1582547&r1=1582546&r2=1582547&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java Thu Mar 27 23:42:56 2014
@@ -20,11 +20,15 @@ package org.apache.hadoop.hive.ql.exec;
import java.io.IOException;
import java.io.Serializable;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
@@ -32,6 +36,8 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
@@ -48,6 +54,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.mapred.JobConf;
@@ -110,6 +117,27 @@ public class ColumnStatsTask extends Tas
}
}
+ private void unpackDecimalStats(ObjectInspector oi, Object o, String fName,
+ ColumnStatisticsObj statsObj) {
+ if (fName.equals("countnulls")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getDecimalStats().setNumNulls(v);
+ } else if (fName.equals("numdistinctvalues")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getDecimalStats().setNumDVs(v);
+ } else if (fName.equals("max")) {
+ HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
+ statsObj.getStatsData().getDecimalStats().setHighValue(convertToThriftDecimal(d));
+ } else if (fName.equals("min")) {
+ HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
+ statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d));
+ }
+ }
+
+ private Decimal convertToThriftDecimal(HiveDecimal d) {
+ return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short)d.scale());
+ }
+
private void unpackLongStats(ObjectInspector oi, Object o, String fName,
ColumnStatisticsObj statsObj) {
if (fName.equals("countnulls")) {
@@ -186,6 +214,10 @@ public class ColumnStatsTask extends Tas
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
statsData.setBinaryStats(binaryStats);
statsObj.setStatsData(statsData);
+ } else if (s.equalsIgnoreCase("decimal")) {
+ DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
+ statsData.setDecimalStats(decimalStats);
+ statsObj.setStatsData(statsData);
}
} else {
// invoke the right unpack method depending on data type of the column
@@ -199,6 +231,8 @@ public class ColumnStatsTask extends Tas
unpackStringStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetBinaryStats()) {
unpackBinaryStats(oi, o, fieldName, statsObj);
+ } else if (statsObj.getStatsData().isSetDecimalStats()) {
+ unpackDecimalStats(oi, o, fieldName, statsObj);
}
}
}
Added: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DecimalNumDistinctValueEstimator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DecimalNumDistinctValueEstimator.java?rev=1582547&view=auto
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DecimalNumDistinctValueEstimator.java (added)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DecimalNumDistinctValueEstimator.java Thu Mar 27 23:42:56 2014
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+
+public class DecimalNumDistinctValueEstimator extends NumDistinctValueEstimator {
+
+ public DecimalNumDistinctValueEstimator(int numBitVectors) {
+ super(numBitVectors);
+ }
+
+ public DecimalNumDistinctValueEstimator(String s, int numBitVectors) {
+ super(s, numBitVectors);
+ }
+
+ public void addToEstimator(HiveDecimal decimal) {
+ int v = decimal.hashCode();
+ super.addToEstimator(v);
+ }
+
+ public void addToEstimatorPCSA(HiveDecimal decimal) {
+ int v = decimal.hashCode();
+ super.addToEstimatorPCSA(v);
+ }
+}
Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java?rev=1582547&r1=1582546&r2=1582547&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java Thu Mar 27 23:42:56 2014
@@ -17,28 +17,26 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
+import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.*;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.BytesWritable;
@@ -88,9 +86,11 @@ public class GenericUDAFComputeStats ext
return new GenericUDAFStringStatsEvaluator();
case BINARY:
return new GenericUDAFBinaryStatsEvaluator();
+ case DECIMAL:
+ return new GenericUDAFDecimalStatsEvaluator();
default:
throw new UDFArgumentTypeException(0,
- "Only integer/long/timestamp/float/double/string/binary/boolean type argument " +
+ "Only integer/long/timestamp/float/double/string/binary/boolean/decimal type argument " +
"is accepted but "
+ parameters[0].getTypeName() + " is passed.");
}
@@ -1474,4 +1474,305 @@ public class GenericUDAFComputeStats ext
return result;
}
}
+
+ public static class GenericUDAFDecimalStatsEvaluator extends GenericUDAFEvaluator {
+
+ /*
+ * Object Inspector corresponding to the input parameter.
+ */
+ private transient PrimitiveObjectInspector inputOI;
+ private transient PrimitiveObjectInspector numVectorsOI;
+ private final static int MAX_BIT_VECTORS = 1024;
+
+ /* Partial aggregation result returned by TerminatePartial. Partial result is a struct
+ * containing a long field named "count".
+ */
+ private transient Object[] partialResult;
+
+ /* Object Inspectors corresponding to the struct returned by TerminatePartial and the long
+ * field within the struct - "count"
+ */
+ private transient StructObjectInspector soi;
+
+ private transient StructField minField;
+ private transient WritableHiveDecimalObjectInspector minFieldOI;
+
+ private transient StructField maxField;
+ private transient WritableHiveDecimalObjectInspector maxFieldOI;
+
+ private transient StructField countNullsField;
+ private transient WritableLongObjectInspector countNullsFieldOI;
+
+ private transient StructField ndvField;
+ private transient WritableStringObjectInspector ndvFieldOI;
+
+ private transient StructField numBitVectorsField;
+ private transient WritableIntObjectInspector numBitVectorsFieldOI;
+
+ /* Output of final result of the aggregation
+ */
+ private transient Object[] result;
+
+ private boolean warned = false;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+
+ // initialize input
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+ inputOI = (PrimitiveObjectInspector) parameters[0];
+ numVectorsOI = (PrimitiveObjectInspector) parameters[1];
+ } else {
+ soi = (StructObjectInspector) parameters[0];
+
+ minField = soi.getStructFieldRef("Min");
+ minFieldOI = (WritableHiveDecimalObjectInspector) minField.getFieldObjectInspector();
+
+ maxField = soi.getStructFieldRef("Max");
+ maxFieldOI = (WritableHiveDecimalObjectInspector) maxField.getFieldObjectInspector();
+
+ countNullsField = soi.getStructFieldRef("CountNulls");
+ countNullsFieldOI = (WritableLongObjectInspector) countNullsField.getFieldObjectInspector();
+
+ ndvField = soi.getStructFieldRef("BitVector");
+ ndvFieldOI = (WritableStringObjectInspector) ndvField.getFieldObjectInspector();
+
+ numBitVectorsField = soi.getStructFieldRef("NumBitVectors");
+ numBitVectorsFieldOI = (WritableIntObjectInspector)
+ numBitVectorsField.getFieldObjectInspector();
+ }
+
+ // initialize output
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+ List<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector);
+
+ List<String> fname = new ArrayList<String>();
+ fname.add("ColumnType");
+ fname.add("Min");
+ fname.add("Max");
+ fname.add("CountNulls");
+ fname.add("BitVector");
+ fname.add("NumBitVectors");
+
+ partialResult = new Object[6];
+ partialResult[0] = new Text();
+ partialResult[1] = new HiveDecimalWritable(HiveDecimal.create(0));
+ partialResult[2] = new HiveDecimalWritable(HiveDecimal.create(0));
+ partialResult[3] = new LongWritable(0);
+ partialResult[4] = new Text();
+ partialResult[5] = new IntWritable(0);
+
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fname,
+ foi);
+ } else {
+ List<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+
+ List<String> fname = new ArrayList<String>();
+ fname.add("ColumnType");
+ fname.add("Min");
+ fname.add("Max");
+ fname.add("CountNulls");
+ fname.add("NumDistinctValues");
+
+ result = new Object[5];
+ result[0] = new Text();
+ result[1] = new HiveDecimalWritable(HiveDecimal.create(0));
+ result[2] = new HiveDecimalWritable(HiveDecimal.create(0));
+ result[3] = new LongWritable(0);
+ result[4] = new LongWritable(0);
+
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fname,
+ foi);
+ }
+ }
+
+ @AggregationType(estimable = true)
+ public static class DecimalStatsAgg extends AbstractAggregationBuffer {
+ public String columnType;
+ public HiveDecimal min; /* Minimum value seen so far */
+ public HiveDecimal max; /* Maximum value seen so far */
+ public long countNulls; /* Count of number of null values seen so far */
+ public DecimalNumDistinctValueEstimator numDV; /* Distinct value estimator */
+ public boolean firstItem; /* First item in the aggBuf? */
+ public int numBitVectors;
+ @Override
+ public int estimate() {
+ JavaDataModel model = JavaDataModel.get();
+ return model.primitive1() * 2 + model.primitive2() + model.lengthOfDecimal() * 2 +
+ model.lengthFor(columnType) + model.lengthFor(numDV);
+ }
+ };
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ DecimalStatsAgg result = new DecimalStatsAgg();
+ reset(result);
+ return result;
+ }
+
+ public void initNDVEstimator(DecimalStatsAgg aggBuffer, int numBitVectors) {
+ aggBuffer.numDV = new DecimalNumDistinctValueEstimator(numBitVectors);
+ aggBuffer.numDV.reset();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ DecimalStatsAgg myagg = (DecimalStatsAgg) agg;
+ myagg.columnType = new String("Decimal");
+ myagg.min = HiveDecimal.create(0);
+ myagg.max = HiveDecimal.create(0);
+ myagg.countNulls = 0;
+ myagg.firstItem = true;
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ Object p = parameters[0];
+ DecimalStatsAgg myagg = (DecimalStatsAgg) agg;
+ boolean emptyTable = false;
+
+ if (parameters[1] == null) {
+ emptyTable = true;
+ }
+
+ if (myagg.firstItem) {
+ int numVectors = 0;
+ if (!emptyTable) {
+ numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI);
+ }
+
+ if (numVectors > MAX_BIT_VECTORS) {
+ throw new HiveException("The maximum allowed value for number of bit vectors " +
+ " is " + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors");
+ }
+
+ initNDVEstimator(myagg, numVectors);
+ myagg.firstItem = false;
+ myagg.numBitVectors = numVectors;
+ }
+
+ if (!emptyTable) {
+
+ //Update null counter if a null value is seen
+ if (p == null) {
+ myagg.countNulls++;
+ }
+ else {
+ try {
+
+ HiveDecimal v = PrimitiveObjectInspectorUtils.getHiveDecimal(p, inputOI);
+
+ //Update min counter if new value is less than min seen so far
+ if (v.compareTo(myagg.min) < 0) {
+ myagg.min = v;
+ }
+
+ //Update max counter if new value is greater than max seen so far
+ if (v.compareTo(myagg.max) > 0) {
+ myagg.max = v;
+ }
+
+ // Add value to NumDistinctValue Estimator
+ myagg.numDV.addToEstimator(v);
+
+ } catch (NumberFormatException e) {
+ if (!warned) {
+ warned = true;
+ LOG.warn(getClass().getSimpleName() + " "
+ + StringUtils.stringifyException(e));
+ LOG.warn(getClass().getSimpleName()
+ + " ignoring similar exceptions.");
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ DecimalStatsAgg myagg = (DecimalStatsAgg) agg;
+
+ // Serialize numDistinctValue Estimator
+ Text t = myagg.numDV.serialize();
+
+ // Serialize the rest of the values in the AggBuffer
+ ((Text) partialResult[0]).set(myagg.columnType);
+ ((HiveDecimalWritable) partialResult[1]).set(myagg.min);
+ ((HiveDecimalWritable) partialResult[2]).set(myagg.max);
+ ((LongWritable) partialResult[3]).set(myagg.countNulls);
+ ((Text) partialResult[4]).set(t);
+ ((IntWritable) partialResult[5]).set(myagg.numBitVectors);
+
+ return partialResult;
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ DecimalStatsAgg myagg = (DecimalStatsAgg) agg;
+
+ if (myagg.firstItem) {
+ Object partialValue = soi.getStructFieldData(partial, numBitVectorsField);
+ int numVectors = numBitVectorsFieldOI.get(partialValue);
+ initNDVEstimator(myagg, numVectors);
+ myagg.firstItem = false;
+ myagg.numBitVectors = numVectors;
+ }
+
+ // Update min if min is lesser than the smallest value seen so far
+ Object partialValue = soi.getStructFieldData(partial, minField);
+ if (myagg.min.compareTo(minFieldOI.getPrimitiveJavaObject(partialValue)) > 0) {
+ myagg.min = minFieldOI.getPrimitiveJavaObject(partialValue);
+ }
+
+ // Update max if max is greater than the largest value seen so far
+ partialValue = soi.getStructFieldData(partial, maxField);
+ if (myagg.max.compareTo(maxFieldOI.getPrimitiveJavaObject(partialValue)) < 0) {
+ myagg.max = maxFieldOI.getPrimitiveJavaObject(partialValue);
+ }
+
+ // Update the null counter
+ partialValue = soi.getStructFieldData(partial, countNullsField);
+ myagg.countNulls += countNullsFieldOI.get(partialValue);
+
+ // Merge numDistinctValue Estimators
+ partialValue = soi.getStructFieldData(partial, ndvField);
+ String v = ndvFieldOI.getPrimitiveJavaObject(partialValue);
+
+ NumDistinctValueEstimator o = new NumDistinctValueEstimator(v, myagg.numBitVectors);
+ myagg.numDV.mergeEstimators(o);
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ DecimalStatsAgg myagg = (DecimalStatsAgg) agg;
+ long numDV = 0;
+
+ if (myagg.numBitVectors != 0) {
+ numDV = myagg.numDV.estimateNumDistinctValues();
+ }
+
+ // Serialize the result struct
+ ((Text) result[0]).set(myagg.columnType);
+ ((HiveDecimalWritable) result[1]).set(myagg.min);
+ ((HiveDecimalWritable) result[2]).set(myagg.max);
+ ((LongWritable) result[3]).set(myagg.countNulls);
+ ((LongWritable) result[4]).set(numDV);
+
+ return result;
+ }
+ }
}
Added: hive/branches/branch-0.13/ql/src/test/queries/clientpositive/compute_stats_decimal.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/queries/clientpositive/compute_stats_decimal.q?rev=1582547&view=auto
==============================================================================
--- hive/branches/branch-0.13/ql/src/test/queries/clientpositive/compute_stats_decimal.q (added)
+++ hive/branches/branch-0.13/ql/src/test/queries/clientpositive/compute_stats_decimal.q Thu Mar 27 23:42:56 2014
@@ -0,0 +1,11 @@
+set hive.stats.autogather=true;
+
+create table tab_decimal(a decimal(10,3));
+
+-- insert some data
+LOAD DATA LOCAL INPATH "../../data/files/decimal.txt" INTO TABLE tab_decimal;
+
+select count(*) from tab_decimal;
+
+-- compute statistical summary of data
+select compute_stats(a, 18) from tab_decimal;
Added: hive/branches/branch-0.13/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/results/clientpositive/compute_stats_decimal.q.out?rev=1582547&view=auto
==============================================================================
--- hive/branches/branch-0.13/ql/src/test/results/clientpositive/compute_stats_decimal.q.out (added)
+++ hive/branches/branch-0.13/ql/src/test/results/clientpositive/compute_stats_decimal.q.out Thu Mar 27 23:42:56 2014
@@ -0,0 +1,37 @@
+PREHOOK: query: create table tab_decimal(a decimal(10,3))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: create table tab_decimal(a decimal(10,3))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab_decimal
+PREHOOK: query: -- insert some data
+LOAD DATA LOCAL INPATH "../../data/files/decimal.txt" INTO TABLE tab_decimal
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tab_decimal
+POSTHOOK: query: -- insert some data
+LOAD DATA LOCAL INPATH "../../data/files/decimal.txt" INTO TABLE tab_decimal
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tab_decimal
+PREHOOK: query: select count(*) from tab_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab_decimal
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab_decimal
+#### A masked pattern was here ####
+18
+PREHOOK: query: -- compute statistical summary of data
+select compute_stats(a, 18) from tab_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab_decimal
+#### A masked pattern was here ####
+POSTHOOK: query: -- compute statistical summary of data
+select compute_stats(a, 18) from tab_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab_decimal
+#### A masked pattern was here ####
+{"columntype":"Decimal","min":-87.2,"max":435.331,"countnulls":2,"numdistinctvalues":13}