You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@tez.apache.org by "Soundararajan Velu (JIRA)" <ji...@apache.org> on 2015/07/15 18:39:05 UTC

[jira] [Created] (TEZ-2622) Tez gives different responses when run on Physical tables and logical views

Soundararajan Velu created TEZ-2622:
---------------------------------------

             Summary: Tez gives different responses when run on Physical tables and logical views
                 Key: TEZ-2622
                 URL: https://issues.apache.org/jira/browse/TEZ-2622
             Project: Apache Tez
          Issue Type: Bug
    Affects Versions: 0.7.0
         Environment: Hive 1.2.0 and Tez 0.7.0,
            Reporter: Soundararajan Velu
            Priority: Critical


The same query, one on view and other on the physical table yields different results, the query on view returns no or few records output.

CBO is turned on and following are the flags used,


set hive.cli.print.current.db=true;
set hive.cli.print.header=true;

set hive.execution.engine=tez;
set mapreduce.job.queuename=admin;
set tez.queue.name=admin;

set hive.tez.container.size=5096;
set tez.task.resource.memory.mb=5096;

set hive.auto.convert.join=true;

set hive.auto.convert.sortmerge.join.to.mapjoin=true;
set hive.auto.convert.sortmerge.join=true;

set hive.enforce.bucketmapjoin=true;
set hive.enforce.bucketing=true;
set hive.enforce.sorting=true;
set hive.enforce.sortmergebucketmapjoin=true;

set hive.optimize.bucketmapjoin.sortedmerge=true; 
set hive.optimize.skewjoin=true;
set hive.optimize.skewjoin.compiletime=true;

set hive.groupby.skewindata=true;
set hive.convert.join.bucket.mapjoin.tez=true;
set hive.exec.parallel=true;
set hive.vectorized.execution.enabled=true;
set hive.vectorized.groupby.maxentries=10240;
set hive.vectorized.groupby.flush.percent=0.1;
set hive.tez.auto.reducer.parallelism=true;
set hive.tez.min.partition.factor=50;
set hive.tez.max.partition.factor=100;
set io.sort.mb=400;
set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;

set hive.hashtable.initialCapacity=1000000;
set hive.hashtable.key.count.adjustment=1.0;
set hive.hashtable.loadfactor=0.99;

set tez.runtime.io.sort.mb=1800;
set tez.runtime.sort.threads=4;
set tez.runtime.io.sort.factor=200;
set tez.runtime.shuffle.memory-to-memory.enable=false;
set tez.runtime.shuffle.memory-to-memory.segments=4;
set tez.runtime.pipelined-shuffle.enable=true;
set tez.runtime.optimize.shared.fetch=true;
set tez.runtime.shuffle.keep-alive.enabled=true;
set tez.runtime.optimize.local.fetch=false;

set hive.exec.reducers.max=300;

set hive.mapjoin.hybridgrace.hashtable=true;
set hive.mapjoin.hybridgrace.memcheckfrequency=1024;
set hive.mapjoin.optimized.hashtable=true;
set hive.mapjoin.optimized.hashtable.wbsize=880000;
set hive.mapjoin.localtask.max.memory.usage=0.99;

set hive.optimize.skewjoin.compiletime=false;
set hive.skewjoin.key=10000000;
set hive.skewjoin.mapjoin.map.tasks=200;
set hive.skewjoin.mapjoin.min.split=134217728;


set hive.compute.query.using.stats=true;



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)