You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@tez.apache.org by "Soundararajan Velu (JIRA)" <ji...@apache.org> on 2015/07/15 18:39:05 UTC
[jira] [Created] (TEZ-2622) Tez gives different responses when run
on Physical tables and logical views
Soundararajan Velu created TEZ-2622:
---------------------------------------
Summary: Tez gives different responses when run on Physical tables and logical views
Key: TEZ-2622
URL: https://issues.apache.org/jira/browse/TEZ-2622
Project: Apache Tez
Issue Type: Bug
Affects Versions: 0.7.0
Environment: Hive 1.2.0 and Tez 0.7.0,
Reporter: Soundararajan Velu
Priority: Critical
The same query, one on view and other on the physical table yields different results, the query on view returns no or few records output.
CBO is turned on and following are the flags used,
set hive.cli.print.current.db=true;
set hive.cli.print.header=true;
set hive.execution.engine=tez;
set mapreduce.job.queuename=admin;
set tez.queue.name=admin;
set hive.tez.container.size=5096;
set tez.task.resource.memory.mb=5096;
set hive.auto.convert.join=true;
set hive.auto.convert.sortmerge.join.to.mapjoin=true;
set hive.auto.convert.sortmerge.join=true;
set hive.enforce.bucketmapjoin=true;
set hive.enforce.bucketing=true;
set hive.enforce.sorting=true;
set hive.enforce.sortmergebucketmapjoin=true;
set hive.optimize.bucketmapjoin.sortedmerge=true;
set hive.optimize.skewjoin=true;
set hive.optimize.skewjoin.compiletime=true;
set hive.groupby.skewindata=true;
set hive.convert.join.bucket.mapjoin.tez=true;
set hive.exec.parallel=true;
set hive.vectorized.execution.enabled=true;
set hive.vectorized.groupby.maxentries=10240;
set hive.vectorized.groupby.flush.percent=0.1;
set hive.tez.auto.reducer.parallelism=true;
set hive.tez.min.partition.factor=50;
set hive.tez.max.partition.factor=100;
set io.sort.mb=400;
set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.hashtable.initialCapacity=1000000;
set hive.hashtable.key.count.adjustment=1.0;
set hive.hashtable.loadfactor=0.99;
set tez.runtime.io.sort.mb=1800;
set tez.runtime.sort.threads=4;
set tez.runtime.io.sort.factor=200;
set tez.runtime.shuffle.memory-to-memory.enable=false;
set tez.runtime.shuffle.memory-to-memory.segments=4;
set tez.runtime.pipelined-shuffle.enable=true;
set tez.runtime.optimize.shared.fetch=true;
set tez.runtime.shuffle.keep-alive.enabled=true;
set tez.runtime.optimize.local.fetch=false;
set hive.exec.reducers.max=300;
set hive.mapjoin.hybridgrace.hashtable=true;
set hive.mapjoin.hybridgrace.memcheckfrequency=1024;
set hive.mapjoin.optimized.hashtable=true;
set hive.mapjoin.optimized.hashtable.wbsize=880000;
set hive.mapjoin.localtask.max.memory.usage=0.99;
set hive.optimize.skewjoin.compiletime=false;
set hive.skewjoin.key=10000000;
set hive.skewjoin.mapjoin.map.tasks=200;
set hive.skewjoin.mapjoin.min.split=134217728;
set hive.compute.query.using.stats=true;
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)