You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@carbondata.apache.org by "xuzhiliang (JIRA)" <ji...@apache.org> on 2017/04/27 08:15:04 UTC

[jira] [Comment Edited] (CARBONDATA-999) use carbondata bulket feature,but it doesn't seem to work?anyone know it?

    [ https://issues.apache.org/jira/browse/CARBONDATA-999?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15986178#comment-15986178 ] 

xuzhiliang edited comment on CARBONDATA-999 at 4/27/17 8:14 AM:
----------------------------------------------------------------

#Mandatory. Carbon Store path
carbon.storelocation=hdfs://master.nascent.com:8020/Opt/CarbonStore
#Base directory for Data files
carbon.ddl.base.hdfs.url=hdfs://master.nascent.com:8020/opt/data
#Path where the bad records are stored
carbon.badRecords.location=/opt/Carbon/Spark/badrecords
#Mandatory. path to kettle home
carbon.kettle.home=/usr/lib/spark/carbonlib/carbonplugins

#File read buffer size used during sorting(in MB) :MIN=1:MAX=100
carbon.sort.file.buffer.size=20
#Rowset size exchanged between data load graph steps :MIN=500:MAX=1000000
carbon.graph.rowset.size=10000
#Number of cores to be used while data loading
carbon.number.of.cores.while.loading=12
#Record count to sort and write to temp intermediate files
carbon.sort.size=50000
#Algorithm for hashmap for hashkey calculation
carbon.enableXXHash=true
#Number of cores to be used for block sort while dataloading
#carbon.number.of.cores.block.sort=7
#max level cache size upto which level cache will be loaded in memory
#carbon.max.level.cache.size=-1
#enable prefetch of data during merge sort while reading data from sort temp files in data loading
#carbon.merge.sort.prefetch=true

#Number of cores to be used while compacting
carbon.number.of.cores.while.compacting=8
#For minor compaction, Number of segments to be merged in stage 1, number of compacted segments to be merged in stage 2.
carbon.compaction.level.threshold=4,3
#default size (in MB) for major compaction to be triggered
carbon.major.compaction.size=1024

#Number of cores to be used for loading index into memory
carbon.number.of.cores=8
#Number of records to be in memory while querying :MIN=100000:MAX=240000
carbon.inmemory.record.size=120000
#Improves the performance of filter query
carbon.enable.quick.filter=false
##number of core to load the blocks in driver
#no.of.cores.to.load.blocks.in.driver=10


##Timestamp format of input data used for timestamp data type.
#carbon.timestamp.format=yyyy-MM-dd HH:mm:ss

##File write buffer size used during sorting.
#carbon.sort.file.write.buffer.size=10485760
##Locking mechanism for data loading on a table
carbon.lock.type=HDFSLOCK
##Minimum no of intermediate files after which sort merged to be started.
#carbon.sort.intermediate.files.limit=20
##space reserved in percentage for writing block meta data in carbon data file
#carbon.block.meta.size.reserved.percentage=10
##csv reading buffer size.
#carbon.csv.read.buffersize.byte=1048576
##To identify and apply compression for non-high cardinality columns
#high.cardinality.value=100000
##maximum no of threads used for reading intermediate files for final merging.
#carbon.merge.sort.reader.thread=3
##Carbon blocklet size. Note: this configuration cannot be change once store is generated
#carbon.blocklet.size=120000
##number of retries to get the metadata lock for loading data to table
#carbon.load.metadata.lock.retries=3
##Minimum blocklets needed for distribution.
#carbon.blockletdistribution.min.blocklet.size=10
##Interval between the retries to get the lock
#carbon.load.metadata.lock.retry.timeout.sec=5
##Temporary store location, By default it will take System.getProperty("java.io.tmpdir")
#carbon.tempstore.location=/opt/Carbon/TempStoreLoc
##data loading records count logger
#carbon.load.log.counter=500000
##to specify number of segments to be preserved from compaction
#carbon.numberof.preserve.segments=0
##To determine the loads of number of days to be compacted
#carbon.allowed.compaction.days=0
##To enable compaction while data loading
#carbon.enable.auto.load.merge=false
##Maximum time allowed for one query to be executed.
max.query.execution.time=60
##Min max is feature added to enhance query performance. To disable this feature, make it false.
carbon.enableMinMax=true
##To enable/disable identify high cardinality during first data loading
#high.cardinality.identify.enable=true
##threshold to identify whether high cardinality column
#high.cardinality.threshold=1000000
##Percentage to identify whether column cardinality is more than configured percent of total row count
#high.cardinality.row.count.percentage=80
##The property to set the date to be considered as start date for calculating the timestamp.
#carbon.cutOffTimestamp=2000-01-01 00:00:00
##The property to set the timestamp (ie milis) conversion to the SECOND, MINUTE, HOUR or DAY level.
#carbon.timegranularity=SECOND



was (Author: xuzhiliang2013):
this is my carbon.properties

#
#  Licensed to the Apache Software Foundation (ASF) under one
#  or more contributor license agreements.  See the NOTICE file
#  distributed with this work for additional information
#  regarding copyright ownership.  The ASF licenses this file
#  to you under the Apache License, Version 2.0 (the
#  "License"); you may not use this file except in compliance
#  with the License.  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
# limitations under the License.
#

#################### System Configuration ##################
#Mandatory. Carbon Store path
carbon.storelocation=hdfs://master.nascent.com:8020/Opt/CarbonStore
#Base directory for Data files
carbon.ddl.base.hdfs.url=hdfs://master.nascent.com:8020/opt/data
#Path where the bad records are stored
carbon.badRecords.location=/opt/Carbon/Spark/badrecords
#Mandatory. path to kettle home
carbon.kettle.home=/usr/lib/spark/carbonlib/carbonplugins

#################### Performance Configuration ##################
######## DataLoading Configuration ########
#File read buffer size used during sorting(in MB) :MIN=1:MAX=100
carbon.sort.file.buffer.size=20
#Rowset size exchanged between data load graph steps :MIN=500:MAX=1000000
carbon.graph.rowset.size=10000
#Number of cores to be used while data loading
carbon.number.of.cores.while.loading=12
#Record count to sort and write to temp intermediate files
carbon.sort.size=50000
#Algorithm for hashmap for hashkey calculation
carbon.enableXXHash=true
#Number of cores to be used for block sort while dataloading
#carbon.number.of.cores.block.sort=7
#max level cache size upto which level cache will be loaded in memory
#carbon.max.level.cache.size=-1
#enable prefetch of data during merge sort while reading data from sort temp files in data loading
#carbon.merge.sort.prefetch=true
######## Compaction Configuration ########
#Number of cores to be used while compacting
carbon.number.of.cores.while.compacting=8
#For minor compaction, Number of segments to be merged in stage 1, number of compacted segments to be merged in stage 2.
carbon.compaction.level.threshold=4,3
#default size (in MB) for major compaction to be triggered
carbon.major.compaction.size=1024
######## Query Configuration ########
#Number of cores to be used for loading index into memory
carbon.number.of.cores=8
#Number of records to be in memory while querying :MIN=100000:MAX=240000
carbon.inmemory.record.size=120000
#Improves the performance of filter query
carbon.enable.quick.filter=false
##number of core to load the blocks in driver
#no.of.cores.to.load.blocks.in.driver=10

#################### Extra Configuration ##################
##Timestamp format of input data used for timestamp data type.
#carbon.timestamp.format=yyyy-MM-dd HH:mm:ss
######## Dataload Configuration ########
##File write buffer size used during sorting.
#carbon.sort.file.write.buffer.size=10485760
##Locking mechanism for data loading on a table
carbon.lock.type=HDFSLOCK
##Minimum no of intermediate files after which sort merged to be started.
#carbon.sort.intermediate.files.limit=20
##space reserved in percentage for writing block meta data in carbon data file
#carbon.block.meta.size.reserved.percentage=10
##csv reading buffer size.
#carbon.csv.read.buffersize.byte=1048576
##To identify and apply compression for non-high cardinality columns
#high.cardinality.value=100000
##maximum no of threads used for reading intermediate files for final merging.
#carbon.merge.sort.reader.thread=3
##Carbon blocklet size. Note: this configuration cannot be change once store is generated
#carbon.blocklet.size=120000
##number of retries to get the metadata lock for loading data to table
#carbon.load.metadata.lock.retries=3
##Minimum blocklets needed for distribution.
#carbon.blockletdistribution.min.blocklet.size=10
##Interval between the retries to get the lock
#carbon.load.metadata.lock.retry.timeout.sec=5
##Temporary store location, By default it will take System.getProperty("java.io.tmpdir")
#carbon.tempstore.location=/opt/Carbon/TempStoreLoc
##data loading records count logger
#carbon.load.log.counter=500000
######## Compaction Configuration ########
##to specify number of segments to be preserved from compaction
#carbon.numberof.preserve.segments=0
##To determine the loads of number of days to be compacted
#carbon.allowed.compaction.days=0
##To enable compaction while data loading
#carbon.enable.auto.load.merge=false
######## Query Configuration ########
##Maximum time allowed for one query to be executed.
max.query.execution.time=60
##Min max is feature added to enhance query performance. To disable this feature, make it false.
carbon.enableMinMax=true
######## Global Dictionary Configurations ########
##To enable/disable identify high cardinality during first data loading
#high.cardinality.identify.enable=true
##threshold to identify whether high cardinality column
#high.cardinality.threshold=1000000
##Percentage to identify whether column cardinality is more than configured percent of total row count
#high.cardinality.row.count.percentage=80
##The property to set the date to be considered as start date for calculating the timestamp.
#carbon.cutOffTimestamp=2000-01-01 00:00:00
##The property to set the timestamp (ie milis) conversion to the SECOND, MINUTE, HOUR or DAY level.
#carbon.timegranularity=SECOND


> use carbondata bulket feature,but it doesn't seem to work?anyone know it?
> -------------------------------------------------------------------------
>
>                 Key: CARBONDATA-999
>                 URL: https://issues.apache.org/jira/browse/CARBONDATA-999
>             Project: CarbonData
>          Issue Type: Bug
>          Components: data-load
>    Affects Versions: 1.1.0
>         Environment: spark 1.6.2,carbondata 1.1.0 rc1
>            Reporter: xuzhiliang
>
> 1.CREATE TABLE kd_shop_test(platFormId int,sellerNick string,companyGuid STRING,companyName STRING) STORED BY 'carbondata' TBLPROPERTIES ('BUCKETNUMBER'='2','BUCKETCOLUMNS'='sellerNick')
> 2. .when loading data
> the sorter is type of ParallelReadMergeSorterImpl,not ParallelReadMergeSorterWithBucketingImpl,why configuration.getBucketingInfo is null?What is wrong with that? Can you fix it?
> 3.hadoop dfs -lsr /Opt/CarbonStore/default/kd_shop_test
> drwxr-xr-x   - root supergroup          0 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Fact
> drwxr-xr-x   - root supergroup          0 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Fact/Part0
> drwxr-xr-x   - root supergroup          0 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Fact/Part0/Segment_0
> -rw-r--r--   3 root supergroup        566 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Fact/Part0/Segment_0/0_batchno0-0-1493278648826.carbonindex
> -rw-r--r--   3 root supergroup        891 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Fact/Part0/Segment_0/part-0-0_batchno0-0-1493278648826.carbondata
> drwxr-xr-x   - root supergroup          0 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Metadata
> -rw-r--r--   3 root supergroup        211 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Metadata/472c32a7-db92-48d9-b2d8-c3b594a92f16.dict
> -rw-r--r--   3 root supergroup         12 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Metadata/472c32a7-db92-48d9-b2d8-c3b594a92f16.dictmeta
> -rw-r--r--   3 root supergroup         31 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Metadata/472c32a7-db92-48d9-b2d8-c3b594a92f16_211.sortindex
> -rw-r--r--   3 root supergroup         39 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Metadata/7010a4ff-5575-42b9-949c-6456d683da0c.dict
> -rw-r--r--   3 root supergroup         11 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Metadata/7010a4ff-5575-42b9-949c-6456d683da0c.dictmeta
> -rw-r--r--   3 root supergroup         11 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Metadata/7010a4ff-5575-42b9-949c-6456d683da0c_39.sortindex
> -rw-r--r--   3 root supergroup         73 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Metadata/bcdb18a6-ff24-4992-9d43-d39cb46c4034.dict
> -rw-r--r--   3 root supergroup         12 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Metadata/bcdb18a6-ff24-4992-9d43-d39cb46c4034.dictmeta
> -rw-r--r--   3 root supergroup         13 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Metadata/bcdb18a6-ff24-4992-9d43-d39cb46c4034_73.sortindex
> -rw-r--r--   3 root supergroup        532 2017-04-27 15:36 /Opt/CarbonStore/default/kd_shop_test/Metadata/schema
> -rw-r--r--   3 root supergroup        268 2017-04-27 15:37 /Opt/CarbonStore/default/kd_shop_test/Metadata/tablestatus



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)