You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by cw...@apache.org on 2012/11/07 05:55:04 UTC

svn commit: r1406465 [1/15] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ conf/ data/files/ metastore/if/ metastore/src/gen/thrift/gen-cpp/ metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ metastore/src/gen/t...

Author: cws
Date: Wed Nov  7 04:55:00 2012
New Revision: 1406465

URL: http://svn.apache.org/viewvc?rev=1406465&view=rev
Log:
HIVE-1362. Column level scalar valued statistics on Tables and Partitions (Shreepadma Venugopalan via cws)

Added:
    hive/trunk/data/files/UserVisits.dat
    hive/trunk/data/files/binary.txt   (with props)
    hive/trunk/data/files/bool.txt   (with props)
    hive/trunk/data/files/double.txt   (with props)
    hive/trunk/data/files/employee.dat
    hive/trunk/data/files/employee2.dat
    hive/trunk/data/files/employee_part.txt   (with props)
    hive/trunk/data/files/int.txt   (with props)
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java   (with props)
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java   (with props)
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatistics.java   (with props)
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsData.java   (with props)
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsDesc.java   (with props)
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsObj.java   (with props)
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java   (with props)
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/InvalidInputException.java   (with props)
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java   (with props)
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java   (with props)
    hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java   (with props)
    hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java   (with props)
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java   (with props)
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java   (with props)
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java   (with props)
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java   (with props)
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DoubleNumDistinctValueEstimator.java   (with props)
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java   (with props)
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/LongNumDistinctValueEstimator.java   (with props)
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java   (with props)
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/StringNumDistinctValueEstimator.java   (with props)
    hive/trunk/ql/src/test/queries/clientnegative/columnstats_partlvl_dp.q
    hive/trunk/ql/src/test/queries/clientnegative/columnstats_partlvl_incorrect_num_keys.q
    hive/trunk/ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q
    hive/trunk/ql/src/test/queries/clientnegative/columnstats_partlvl_multiple_part_clause.q
    hive/trunk/ql/src/test/queries/clientnegative/columnstats_tbllvl.q
    hive/trunk/ql/src/test/queries/clientnegative/columnstats_tbllvl_complex_type.q
    hive/trunk/ql/src/test/queries/clientnegative/columnstats_tbllvl_incorrect_column.q
    hive/trunk/ql/src/test/queries/clientpositive/columnstats_partlvl.q
    hive/trunk/ql/src/test/queries/clientpositive/columnstats_tbllvl.q
    hive/trunk/ql/src/test/queries/clientpositive/compute_stats_binary.q
    hive/trunk/ql/src/test/queries/clientpositive/compute_stats_boolean.q
    hive/trunk/ql/src/test/queries/clientpositive/compute_stats_double.q
    hive/trunk/ql/src/test/queries/clientpositive/compute_stats_long.q
    hive/trunk/ql/src/test/queries/clientpositive/compute_stats_string.q
    hive/trunk/ql/src/test/results/clientnegative/columnstats_partlvl_dp.q.out
    hive/trunk/ql/src/test/results/clientnegative/columnstats_partlvl_incorrect_num_keys.q.out
    hive/trunk/ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out
    hive/trunk/ql/src/test/results/clientnegative/columnstats_partlvl_multiple_part_clause.q.out
    hive/trunk/ql/src/test/results/clientnegative/columnstats_tbllvl.q.out
    hive/trunk/ql/src/test/results/clientnegative/columnstats_tbllvl_complex_type.q.out
    hive/trunk/ql/src/test/results/clientnegative/columnstats_tbllvl_incorrect_column.q.out
    hive/trunk/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
    hive/trunk/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
    hive/trunk/ql/src/test/results/clientpositive/compute_stats_binary.q.out
    hive/trunk/ql/src/test/results/clientpositive/compute_stats_boolean.q.out
    hive/trunk/ql/src/test/results/clientpositive/compute_stats_double.q.out
    hive/trunk/ql/src/test/results/clientpositive/compute_stats_long.q.out
    hive/trunk/ql/src/test/results/clientpositive/compute_stats_string.q.out
Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/conf/hive-default.xml.template
    hive/trunk/metastore/if/hive_metastore.thrift
    hive/trunk/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.cpp
    hive/trunk/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.h
    hive/trunk/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore_server.skeleton.cpp
    hive/trunk/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
    hive/trunk/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/EnvironmentContext.java
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Schema.java
    hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ThriftHiveMetastore.java
    hive/trunk/metastore/src/gen/thrift/gen-php/hive_metastore/ThriftHiveMetastore.php
    hive/trunk/metastore/src/gen/thrift/gen-php/hive_metastore/hive_metastore_types.php
    hive/trunk/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote
    hive/trunk/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore.py
    hive/trunk/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
    hive/trunk/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
    hive/trunk/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
    hive/trunk/metastore/src/model/package.jdo
    hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
    hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
    hive/trunk/ql/build.xml
    hive/trunk/ql/if/queryplan.thrift
    hive/trunk/ql/ivy.xml
    hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp
    hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.h
    hive/trunk/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/StageType.java
    hive/trunk/ql/src/gen/thrift/gen-php/queryplan/queryplan_types.php
    hive/trunk/ql/src/gen/thrift/gen-py/queryplan/ttypes.py
    hive/trunk/ql/src/gen/thrift/gen-rb/queryplan_types.rb
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
    hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed Nov  7 04:55:00 2012
@@ -557,6 +557,9 @@ public class HiveConf extends Configurat
     HIVE_STATS_RELIABLE("hive.stats.reliable", false),
     // Collect table access keys information for operators that can benefit from bucketing
     HIVE_STATS_COLLECT_TABLEKEYS("hive.stats.collect.tablekeys", false),
+    // standard error allowed for ndv estimates. A lower value indicates higher accuracy and a
+    // higher compute cost.
+    HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0),
 
     // Concurrency
     HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", false),

Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Wed Nov  7 04:55:00 2012
@@ -1081,6 +1081,13 @@
 </property>
 
 <property>
+  <name>hive.stats.ndv.error</name>
+  <value>20.0</value>
+  <description>Standard error expressed in percentage. Provides a tradeoff between accuracy and compute cost.A lower value for error indicates higher accuracy and a higher compute cost.
+  </description>
+</property>
+
+<property>
   <name>hive.support.concurrency</name>
   <value>false</value>
   <description>Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.</description>

Added: hive/trunk/data/files/UserVisits.dat
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/UserVisits.dat?rev=1406465&view=auto
==============================================================================
--- hive/trunk/data/files/UserVisits.dat (added)
+++ hive/trunk/data/files/UserVisits.dat Wed Nov  7 04:55:00 2012
@@ -0,0 +1,55 @@
+170.131.22.2|13rdgckzlcblruc.html|1984-8-7|336.869186722|NuSearch Spider|HUN|HUN-NL|remnants|3
+162.114.4.2|6xpirzjeytxdjsmwtmyeugkesratmpvamliekrijlgmvyyrslqwgw.html|1978-1-9|331.791153595|Superdownloads Spiderma|AUT|AUT-ZR|MHD|8
+177.110.45.18|11zvmoamsyaameokoeylbkivgquksibqbalnpmailbiyfxitbhfdroyxesixbjndkyqzl.html|1986-9-25|411.968497603|Mozilla/4.0|FLK|FLK-GB|apj@as.arizona.edu.|7
+157.111.12.37|44mvdnls.html|2002-7-3|486.660926201|PHP/4.0.|FIN|FIN-CZ|diffuse|3
+161.100.45.22|14ceyigx.html|1978-10-26|399.80234522|NP/0.1|BEN|BEN-CA|region|8
+164.118.48.16|28axfinfqwdcwoorukpwqvqoxxeuivbniclnkytavwdslrj.html|1996-12-8|298.335411612|MSNBOT_Mobile MSMOBOT Mozilla/2.0|USA|USA-IO|medium|1
+153.104.13.11|19aysprojntmnwymfdkaznbqxprxxaissjqkzhzivsvipuvuxfuxsvnqlfnigvby.html|1976-10-6|146.309480768|WebSearch.COM.AU/3.0.1|MCO|MCO-YD|state|5
+150.112.45.27|12hcaewxiswjeezfnlulkenwubaqsitpuarufosogoxls.html|1995-6-19|173.469334335|WinkBot/0.06|PHL|PHL-NN|important|2
+152.108.39.16|36umg.html|1974-3-28|269.969215988|GSiteCrawler/v1.xx rev. xxx|MNG|MNG-HI|...)|6
+174.119.41.16|60yxoboskwpyfin.html|2002-7-17|436.113482675|Infoseek SideWinder/2.0B|NIC|NIC-JP|data|1
+165.116.21.12|70difiadhmrvragggmoaufnuwwbakbjntnwzvxcdjtybufiarwbmcphzmizwkikewh.html|1984-2-6|13.099044572|WWWeasel Robot v1.00|THA|THA-FO|bubbles|6
+155.128.42.14|21brkepinqiwvtmfmebjckkhwevhxaesogkykzgyqpuligrul.html|1986-7-29|347.800952938|Mozilla/4.0 compatible ZyBorg/1.0|IRN|IRN-YS|conduction|1
+156.131.31.12|14nbaaoablhxrlvbfgrwcxktvshtkoqzddbdepegbmesxztdglzjjkc.html|2002-7-30|85.7691140217|Java1.1.xx.|BRA|BRA-BL|circumstellar|9
+159.122.42.18|4xfydvopxveeduudfzodxkbczvdlzou.html|1989-9-20|332.572440865|Metaeuro Web Crawler/0.2|LUX|LUX-SD|kinematics|7
+151.104.39.45|65psclahgvasawczpyicyxkuqzwpbowghmzkxzsdvtwwpzvfydiwbsqrrmhtbezjqyuo.html|2002-1-13|190.528735328|JobSpider_BA/1.|UGA|UGA-PY|pulsars:|7
+159.132.24.22|18vhcbzhhblfbayejcybyibwqsgzlkmswizyjzgrbrw.html|1978-1-2|182.368755789|Piffany_Web_Scraper_v0.|ITA|ITA-NJ|nonthermal|1
+170.101.17.16|40prmxavsjoizdzkgsncesndxebatfwvrmmejnacxol.html|1989-9-1|41.4163486896|Mozilla/4.01 [en]|ZAF|ZAF-AK|Scuti|6
+171.124.38.2|29nripzogexadckoiaoafxvtkrxksdqgveydtxsabpbfsltbmibrfwlqojagmr.html|1979-6-12|192.085693167|IconSurf/2.0 favicon monitor|SVN|SVN-DY|systems|5
+178.128.29.41|24tmrndfialwvkwybuspjyexlkiamebwtvilimqqncnimkgofzepximj.html|2000-7-8|276.89796127|obidos-bot|SLB|SLB-RL|(...|4
+175.101.24.43|70dcfbcotdzhfhuhquyosbcviglrkrakddmifpxzswg.html|1978-3-16|131.775726872|Mozilla/4.0|BMU|BMU-BR|spiral|6
+155.102.37.30|99cyllzbnsowifxdxsdmiseiceeriaaoucmgnlhaewxmbvqynulwmpepujhckhqfjdmxpuyt.html|1975-5-4|311.052004479|WebSearch.COM.AU/3.0.1|NLD|NLD-GX|Herbig-Haro|6
+156.105.11.18|1nczmzpivhbgn.html|1992-9-19|36.9747263531|Search/1.0|GLP|GLP-DJ|observations|3
+164.115.38.23|79bvcojctkaugbcterbzfykwvesklokgilbkalntvoocqqvuixunvekqjcburlbzxckxnyrjm.html|1991-4-20|267.047961774|Journster.com RSS/Atom aggregator 0.5|HKG|HKG-PK|radio|2
+179.133.2.36|12azizhsdhdgdpidjgmdeyzmfhdwsbezbeyjegcioforvxvfehjigiulqyhizmhargkwmmeartsnrosvvbdbkynawvi.html|1999-12-9|481.463770712|LeechGet 200x|SCG|SCG-XF|instruments|8
+178.107.45.18|45mbziaowxegkhzcmbsyrextgqjbyezodmqduqrqnwxydwaqytopxmidcsfbwfparfemvwdjtaiwxjnvcclaotdrmjs.html|1983-4-13|51.6686671965|IlTrovatore/1.2|HND|HND-AN|dynamics|2
+162.117.17.14|17tkabzxynnqswezhqmkvrlfycpmxqowlhgligihuwxmscmasylopwuozjawaotlwaxfggmack.html|2001-12-24|161.048060104|Mozilla/4.5 [en]C-CCK-MCD {TLC;RETAIL}|RWA|RWA-QE|rays|9
+178.119.40.7|48amqtmqxsjgrmjkszztfpegqzapidysnze.html|1987-4-3|492.988714137|Mozilla/4.0|AUT|AUT-ZR|cosmology:|8
+160.119.18.18|15yufqaoxpuqwb.html|1979-7-22|394.694548614|scooter-venus-3.0.vn|MCO|MCO-YD|outflows|1
+162.112.21.25|21boum.html|1991-2-6|165.368136543|LinkProver 2.|TCA|TCA-IS|spots|8
+176.112.31.17|20gblxgjcvpu.html|1991-8-5|78.2740990152|Mozilla/4.0|BMU|BMU-BR|masses|2
+166.130.12.13|9izokfebomgsiifyzrsepbbemutvj.html|2003-12-5|188.600736756|WWW-Mechanize/1.1|TGO|TGO-WB|bursts|5
+171.100.18.39|97sxfsgahjujwzlszmxkahyslcobrrlx.html|1985-11-21|143.277058506|Overture-WebCrawler/3.8/Fresh|SAU|SAU-KL|interferometric|5
+152.122.43.35|85zdszgzonsxkqbrkthtceiuzjsedwvghvkzvqzj.html|1989-12-1|315.628996565|moget/x.x|UMI|UMI-VU|Galaxy:|2
+157.133.36.37|15xnilzhtqjsxhhbzazrflznupllyhvdbsqjeqqyharfiyhhyhzdszrnpcyoktslljvqam.html|1990-3-20|426.498017786|PrivacyFinder/1.|UZB|UZB-ZJ|nebulae|7
+161.134.11.11|96kvrofepctfbesrphjiznjktygntkkubupsjvxyxrdzvwrkeasdobohauvueg.html|1984-6-6|280.039128409|Waypath development crawler - info at waypath dot co|IDN|IDN-BH|supergiants|6
+163.123.23.13|19rkrtwumqwmnnzisxyeesqacwolpypyxhipaejnvfzitzrlwqqbigblcqxrpnqmuybudkiyqhhjgzvdpleysg.html|1977-10-11|86.3390049695|Opera/5.0|LSO|LSO-PW|testing|7
+166.126.40.21|52ejufqiidwioozorbnsjxezfwaucndbihldnblvehdtwchoeuhoslnyioslbwmkdynrzymegpy.html|1990-10-20|125.582281932|Mozilla/4.0|BTN|BTN-HP|catalogs|9
+158.133.10.19|87nzdhsnzhkylakazmkvctgaaxtrafpxscxvjqijxthitrj.html|1982-10-5|481.583542862|larbin|GAB|GAB-CS|angular|8
+173.104.45.8|49sdptdphxjlbiwrbbrsebwqquadx.html|1981-5-2|41.3182727245|LECodeChecker/3.0 libgetdoc/1.|AUS|AUS-AV|bands|6
+160.101.31.43|6lrepnctlanokfhla.html|1973-9-7|133.29867101|sogou develop spide|SWE|SWE-TM|time|5
+150.127.33.8|22oeawpxhqahkvtaecwp.html|1999-3-16|398.882494477|W3C-WebCon/5.x.x libwww/5.x.|ISR|ISR-DY|history|1
+154.114.47.36|2mzzsgievabpkaoqegadbbjxwkutdisnvrmox.html|1981-7-24|332.760102125|mammoth/1.0|AUT|AUT-ZR|FUNCTION|3
+155.108.15.24|22beewtbnpw.html|1996-6-7|393.470347637|Scrubby/3.0|ABW|ABW-NB|horizontal-branch|4
+177.120.40.39|48itvyjulckeddslsuayoguojzhvqvmfgvyctiwflhj.html|1977-8-12|239.601807636|webmeasurement-bot, http://rvs.informatik.uni-leipzig.d|WSM|WSM-UF|are|3
+179.123.41.31|46eppnympstjuhivvpritvotqmivgsfmdkbtxafns.html|2001-11-26|258.55616439|Mozilla/2.0|SYR|SYR-XP|photometric|1
+175.100.9.4|32fjrnrlabonc.html|1988-10-22|344.394849153|Snapbot/1.|GUF|GUF-KP|acceleration|2
+155.126.7.17|72wufwnsdsqncftnvdcunnknzqnaiyflmcgsytkbmbpogicblew.html|1981-12-5|398.334494319|UKWizz/Nutch-0.8.1|NIC|NIC-JP|Kuiper|4
+150.118.20.31|1mbyargbxtnjtivflxzzredcfbtehxbxjcwkucmrwaaqiwvutuulzxnezhi.html|1982-8-27|168.936669894|Mozilla/4.0|IRL|IRL-NN|cataclysmic|5
+177.116.39.36|84maivbmcqggefkjtsde.html|1982-6-11|88.121669797|Mozilla/4.0|ARE|ARE-MX|instruments|1
+168.119.19.26|73vhjursdvxateuvrxsspwwfdbsoqfegeannuegyadzuitparisgfomiqfxhkcnocacxfivfmuzuopvfynmdcyl.html|1991-11-17|397.829289621|webbandit/4.xx.|NIC|NIC-JP|dust|2
+154.100.36.32|57rylepuglpfqvjwkxgrtftvqkjzjwsznjyzontuzizqdimofsfzxzuojeot.html|1999-1-5|334.714055649|RRC|GTM|GTM-VH|blue|7
+153.112.2.11|6pkwxtlgkkxoqtxpgrullqxjauquvmlkcwhzpsgzdeotymieddqpu.html|1975-8-6|348.218411093|Wotbox/alpha0.6|MNP|MNP-UD|supernovae:|6
+150.107.15.22|53gohsgrvrjgfptttlpfipgsnijsrhxsyeggwnysfhykxrdqdsvlicdwkmpcumut.html|1978-8-2|355.771603423|Mozilla/3.0|DEU|DEU-PU|stars|4
+150.126.27.44|0rgxbnwiqebsmszpkvfpxvhkleebngzxxgvzt.html|1989-5-18|467.800755054|Mozilla/3.01|ZWE|ZWE-TS|system|3
+151.101.32.3|34btbqii.html|1998-8-1|131.055972797|Orca Browser|THA|THA-FO|late-type|5

Added: hive/trunk/data/files/binary.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/binary.txt?rev=1406465&view=auto
==============================================================================
--- hive/trunk/data/files/binary.txt (added)
+++ hive/trunk/data/files/binary.txt Wed Nov  7 04:55:00 2012
@@ -0,0 +1,10 @@
+the quick brown fox jumped over the lazy little dog
+today is nice outside
+the quick brown fox jumped over the lazy little dog
+
+wikipedia is a great source of information
+the quick brown fox jumped over the lazy little dog
+
+estimating the number of distinct values is a hard problem
+
+the quick brown fox jumped over the lazy little dog

Propchange: hive/trunk/data/files/binary.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: hive/trunk/data/files/bool.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/bool.txt?rev=1406465&view=auto
==============================================================================
--- hive/trunk/data/files/bool.txt (added)
+++ hive/trunk/data/files/bool.txt Wed Nov  7 04:55:00 2012
@@ -0,0 +1,33 @@
+true
+false
+true
+true
+true
+false
+false
+false
+false
+true
+true
+true
+true
+false
+
+false
+true
+true
+false
+false
+false
+false
+false
+false
+false
+false
+true
+false
+false
+false
+true
+true
+false

Propchange: hive/trunk/data/files/bool.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: hive/trunk/data/files/double.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/double.txt?rev=1406465&view=auto
==============================================================================
--- hive/trunk/data/files/double.txt (added)
+++ hive/trunk/data/files/double.txt Wed Nov  7 04:55:00 2012
@@ -0,0 +1,16 @@
+55.33
+44.2
+435.33
+324.33
+324.33
+44.2
+55.3
+55.3
+0.0
+
+66.4
+23.22
+-87.2
+
+33.44
+55.3

Propchange: hive/trunk/data/files/double.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: hive/trunk/data/files/employee.dat
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/employee.dat?rev=1406465&view=auto
==============================================================================
--- hive/trunk/data/files/employee.dat (added)
+++ hive/trunk/data/files/employee.dat Wed Nov  7 04:55:00 2012
@@ -0,0 +1,13 @@
+16|john
+17|robert
+18|andrew
+19|katty
+21|tom
+22|tim
+23|james
+24|paul
+27|edward
+29|alan
+31|kerry
+34|terri
+

Added: hive/trunk/data/files/employee2.dat
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/employee2.dat?rev=1406465&view=auto
==============================================================================
--- hive/trunk/data/files/employee2.dat (added)
+++ hive/trunk/data/files/employee2.dat Wed Nov  7 04:55:00 2012
@@ -0,0 +1,7 @@
+16|john
+17|robert
+18|andrew
+19|katty
+27|edward
+29|alan
+31|kerry

Added: hive/trunk/data/files/employee_part.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/employee_part.txt?rev=1406465&view=auto
==============================================================================
--- hive/trunk/data/files/employee_part.txt (added)
+++ hive/trunk/data/files/employee_part.txt Wed Nov  7 04:55:00 2012
@@ -0,0 +1,9 @@
+16|john|4000|USA
+17|robert|2000|USA
+18|andrew|4000|USA
+19|katty|2000|USA
+27|edward|4000|UK
+29|alan|3000|UK
+31|kerry|4000|UK
+34|tom|3000|UK
+35|zack|2000|UK

Propchange: hive/trunk/data/files/employee_part.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: hive/trunk/data/files/int.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/int.txt?rev=1406465&view=auto
==============================================================================
--- hive/trunk/data/files/int.txt (added)
+++ hive/trunk/data/files/int.txt Wed Nov  7 04:55:00 2012
@@ -0,0 +1,12 @@
+4
+252
+233
+
+343
+43
+45
+344
+22
+54
+8
+13

Propchange: hive/trunk/data/files/int.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: hive/trunk/metastore/if/hive_metastore.thrift
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/if/hive_metastore.thrift?rev=1406465&r1=1406464&r2=1406465&view=diff
==============================================================================
--- hive/trunk/metastore/if/hive_metastore.thrift (original)
+++ hive/trunk/metastore/if/hive_metastore.thrift Wed Nov  7 04:55:00 2012
@@ -194,6 +194,67 @@ struct Index {
   10: bool         deferredRebuild
 }
 
+// column statistics
+struct BooleanColumnStatsData {
+1: required i64 numTrues,
+2: required i64 numFalses,
+3: required i64 numNulls
+}
+
+struct DoubleColumnStatsData {
+1: required double lowValue,
+2: required double highValue,
+3: required i64 numNulls,
+4: required i64 numDVs
+}
+
+struct LongColumnStatsData {
+1: required i64 lowValue,
+2: required i64 highValue,
+3: required i64 numNulls,
+4: required i64 numDVs
+}
+
+struct StringColumnStatsData {
+1: required i64 maxColLen,
+2: required double avgColLen,
+3: required i64 numNulls,
+4: required i64 numDVs
+}
+
+struct BinaryColumnStatsData {
+1: required i64 maxColLen,
+2: required double avgColLen,
+3: required i64 numNulls
+}
+
+union ColumnStatisticsData {
+1: BooleanColumnStatsData booleanStats,
+2: LongColumnStatsData longStats,
+3: DoubleColumnStatsData doubleStats,
+4: StringColumnStatsData stringStats,
+5: BinaryColumnStatsData binaryStats
+}
+
+struct ColumnStatisticsObj {
+1: required string colName,
+2: required string colType,
+3: required ColumnStatisticsData statsData
+}
+
+struct ColumnStatisticsDesc {
+1: required bool isTblLevel,
+2: required string dbName,
+3: required string tableName,
+4: optional string partName,
+5: optional i64 lastAnalyzed
+}
+
+struct ColumnStatistics {
+1: required ColumnStatisticsDesc statsDesc,
+2: required list<ColumnStatisticsObj> statsObj;
+}
+
 // schema of the table/query results etc.
 struct Schema {
  // column names, types, comments
@@ -253,6 +314,10 @@ exception ConfigValSecurityException {
   1: string message
 }
 
+exception InvalidInputException {
+  1: string message
+}
+
 /**
 * This interface is live.
 */
@@ -472,6 +537,37 @@ service ThriftHiveMetastore extends fb30
   list<string> get_index_names(1:string db_name, 2:string tbl_name, 3:i16 max_indexes=-1)
                        throws(1:MetaException o2)
 
+  // column statistics interfaces
+
+  // update APIs persist the column statistics object(s) that are passed in. If statistics already
+  // exists for one or more columns, the existing statistics will be overwritten. The update APIs
+  // validate that the dbName, tableName, partName, colName[] passed in as part of the ColumnStatistics
+  // struct are valid, throws InvalidInputException/NoSuchObjectException if found to be invalid
+  bool update_table_column_statistics(1:ColumnStatistics stats_obj) throws (1:NoSuchObjectException o1,
+              2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
+  bool update_partition_column_statistics(1:ColumnStatistics stats_obj) throws (1:NoSuchObjectException o1,
+              2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
+
+  // get APIs return the column statistics corresponding to db_name, tbl_name, [part_name], col_name if
+  // such statistics exists. If the required statistics doesn't exist, get APIs throw NoSuchObjectException
+  // For instance, if get_table_column_statistics is called on a partitioned table for which only
+  // partition level column stats exist, get_table_column_statistics will throw NoSuchObjectException
+  ColumnStatistics get_table_column_statistics(1:string db_name, 2:string tbl_name, 3:string col_name) throws
+              (1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidInputException o3, 4:InvalidObjectException o4)
+  ColumnStatistics get_partition_column_statistics(1:string db_name, 2:string tbl_name, 3:string part_name,
+               4:string col_name) throws (1:NoSuchObjectException o1, 2:MetaException o2,
+               3:InvalidInputException o3, 4:InvalidObjectException o4)
+
+  // delete APIs attempt to delete column statistics, if found, associated with a given db_name, tbl_name, [part_name]
+  // and col_name. If the delete API doesn't find the statistics record in the metastore, throws NoSuchObjectException
+  // Delete API validates the input and if the input is invalid throws InvalidInputException/InvalidObjectException.
+  bool delete_partition_column_statistics(1:string db_name, 2:string tbl_name, 3:string part_name, 4:string col_name) throws
+              (1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidObjectException o3,
+               4:InvalidInputException o4)
+  bool delete_table_column_statistics(1:string db_name, 2:string tbl_name, 3:string col_name) throws
+              (1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidObjectException o3,
+               4:InvalidInputException o4)
+
   //authorization privileges
                        
   bool create_role(1:Role role) throws(1:MetaException o1)