You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@iotdb.apache.org by "刘珍 (Jira)" <ji...@apache.org> on 2023/05/29 07:35:00 UTC

[jira] [Created] (IOTDB-5931) The "show cluster" command displays nodes with "Unknown" status, but these nodes can still perform read and write operations normally.

刘珍 created IOTDB-5931:
-------------------------

             Summary: The "show cluster" command displays nodes with "Unknown" status, but these nodes can still perform read and write operations normally.
                 Key: IOTDB-5931
                 URL: https://issues.apache.org/jira/browse/IOTDB-5931
             Project: Apache IoTDB
          Issue Type: Bug
          Components: Core/Cluster, mpp-cluster
            Reporter: 刘珍
            Assignee: Xinyu Tan
         Attachments: image-2023-05-29-15-30-45-953.png, image-2023-05-29-15-31-02-797.png

测试版本:iotdb master 0524_12d67e0
问题1 :
3副本3C21D集群,长时间循环运行 load tsfile ; delete 所有数据;show cluster 21D显式状态为Unkown,但是客户端仍然可以继续读写正常。
 !image-2023-05-29-15-30-45-953.png! 
问题2:不同datanode show cluster 结果不同
 !image-2023-05-29-15-31-02-797.png! 
测试环境 ,私有云1期,172.16.2.2 - 25
1. 配置参数
COMMON配置
schema_region_group_extension_policy=CUSTOM
default_schema_region_group_num_per_database=10
data_region_group_extension_policy=CUSTOM
default_data_region_group_num_per_database=42
min_cross_compaction_unseq_file_level=0
schema_replication_factor=3
data_replication_factor=3
default_storage_group_level=2
compaction_write_throughput_mb_per_sec=64

confignode
MAX_HEAP_SIZE="20G"
MAX_DIRECT_MEMORY_SIZE="6G"
cn_target_config_node_list=172.16.2.23:10710

DATANODE:
MAX_HEAP_SIZE="20G"
MAX_DIRECT_MEMORY_SIZE="6G"
dn_target_config_node_list=172.16.2.23:10710,172.16.2.24:10710,172.16.2.25:10710

2. 客户端测试脚本在172.16.2.2
/data1/iotdb/i_m_0524_12d67e0路径下
cat load_insert_drop_db_1.sh
v_host="172.16.2.2"
cluster_dir="/data1/iotdb"
db_commit="i_m_0524_12d67e0"
db_dir="${cluster_dir}/${db_commit}"
u_name="root"
${db_dir}/sbin/start-cli.sh -h ${v_host}  -e "delete from  root.test.g_0.**;"
${db_dir}/sbin/start-cli.sh  -h ${v_host} -e 'load "/data/iotdb/load_tsfile/load_tsfile_level_1/"  verify=false sglevel=2 onSuccess=none'
${db_dir}/sbin/start-cli.sh  -h ${v_host} -e 'load "/data/iotdb/load_tsfile/load_tsfile_1"  verify=false sglevel=2 onSuccess=none'
${db_dir}/sbin/start-cli.sh  -h ${v_host} -e 'load "/data/iotdb/load_tsfile/load_tsfile_2"  verify=false sglevel=2 onSuccess=none'
${db_dir}/sbin/start-cli.sh -h ${v_host}  -e "flush"
${db_dir}/sbin/start-cli.sh -h ${v_host}  -e "select count(s_0) from root.test.g_0.** align by device;" >act.out

v_diff=`diff ${db_dir}/exp.out ${db_dir}/act.out|grep root|wc -l`

if [[ ${v_diff} = 0 ]];then
   echo "query pass." >> query_res.out
else
   v_date=`date "+%Y-%m-%d_%H_%M_%S"`
   echo "${v_date} query fail." >> aft_load_query_res.out
fi
exec 3<./dn.txt
while read node <&3
do
        v_comp=`ssh ${u_name}@${node} "find ${db_dir}/data/ -name *compaction.log|wc -l"`
        if [[ ${v_comp} -gt 0 ]];then
                sleep 2
                ${db_dir}/sbin/start-cli.sh -h ${v_host}  -e "delete from  root.test.g_0.**;"
                ${db_dir}/sbin/start-cli.sh -h ${v_host}  -e "select count(s_0) from  root.test.g_0.** having count(s_0)>0;" >> del_data_q.out
                break
        fi
done
sleep 10
for i in {1..3}
do
        exec 3<./dn.txt
        while read node <&3
        do

                v_comp=`ssh ${u_name}@${node} "find ${db_dir}/data/ -name *compaction.log|wc -l"`
                if [[ ${v_comp} -gt 0 ]];then
                   echo "${node} after delete from root.test.g_0 still compacting." >> not_expect_res.out
                fi
        done
done




--
This message was sent by Atlassian Jira
(v8.20.10#820010)