You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ya...@apache.org on 2020/08/18 07:37:26 UTC

[incubator-doris] branch master updated: Update support batch delete storage design document (#4234)

This is an automated email from the ASF dual-hosted git repository.

yangzhg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8a3eaee  Update support batch delete storage design document  (#4234)
8a3eaee is described below

commit 8a3eaeecf12628d72342ed9d6e62da90092de7e6
Author: ZhangYu0123 <67...@users.noreply.github.com>
AuthorDate: Tue Aug 18 15:37:14 2020 +0800

    Update support batch delete storage design document  (#4234)
    
    * Update delete index design document
---
 docs/en/internal/doris_storage_optimization.md    | 78 ++++++++++++-----------
 docs/zh-CN/internal/doris_storage_optimization.md | 38 +++++------
 2 files changed, 60 insertions(+), 56 deletions(-)

diff --git a/docs/en/internal/doris_storage_optimization.md b/docs/en/internal/doris_storage_optimization.md
index 529b85d..6ceccad 100644
--- a/docs/en/internal/doris_storage_optimization.md
+++ b/docs/en/internal/doris_storage_optimization.md
@@ -36,7 +36,7 @@ Documents include:
 - The file starts with an 8-byte magic code to identify the file format and version
 - Data Region: Used to store data information for each column, where the data is loaded on demand by pages.
 - Index Region: Doris stores the index data of each column in Index Region, where the data is loaded according to column granularity, so the data information of the following column is stored separately.
-- Footer信息
+- Footer
 	- FileFooterPB: Metadata Information for Definition Files
 	- Chesum of 4 bytes of footer Pb content
 	- Four bytes FileFooterPB message length for reading FileFooterPB
@@ -116,27 +116,29 @@ We generate a sparse index of short key every N rows (configurable) with the con
 The format design supports the subsequent expansion of other index information, such as bitmap index, spatial index, etc. It only needs to write the required data to the existing column data, and add the corresponding metadata fields to FileFooterPB.
 
 ### Metadata Definition ###
-FileFooterPB is defined as:
+SegmentFooterPB is defined as:
 
 ```
 message ColumnPB {
-    optional uint32 column_id = 1; // 这里使用column id,不使用column name是因为计划支持修改列名
-    optional string type = 2; // 列类型
-    optional string aggregation = 3; // 是否聚合
-    optional uint32 length = 4; // 长度
-    optional bool is_key = 5; // 是否是主键列
-    optional string default_value = 6; // 默认值
-    optional uint32 precision = 9 [default = 27]; // 精度
-    optional uint32 frac = 10 [default = 9];
-    optional bool is_nullable = 11 [default=false]; // 是否有null
-    optional bool is_bf_column = 15 [default=false]; // 是否有bf词典
-	optional bool is_bitmap_column = 16 [default=false]; // 是否有bitmap索引
+    required int32 unique_id = 1;   // The column id is used here, and the column name is not used
+    optional string name = 2;   // Column name,  when name equals __DORIS_DELETE_SIGN__, this column is a hidden delete column
+    required string type = 3;   // Column type
+    optional bool is_key = 4;   // Whether column is a primary key column
+    optional string aggregation = 5;    // Aggregate type
+    optional bool is_nullable = 6;      // Whether column is allowed to assgin null
+    optional bytes default_value = 7;   // Defalut value
+    optional int32 precision = 8;       // Precision of column
+    optional int32 frac = 9;
+    optional int32 length = 10;         // Length of column
+    optional int32 index_length = 11;   // Length of column index
+    optional bool is_bf_column = 12;    // Whether column has bloom filter index
+    optional bool has_bitmap_index = 15 [default=false];  // Whether column has bitmap index
 }
 
-// page偏移
+// page offset
 message PagePointerPB {
-	required uint64 offset; // page在文件中的偏移
-	required uint32 length; // page的大小
+	required uint64 offset; // offset of page in segment file
+	required uint32 length; // length of page
 }
 
 message MetadataPairPB {
@@ -145,36 +147,36 @@ message MetadataPairPB {
 }
 
 message ColumnMetaPB {
-	optional ColumnMessage encoding; // 编码方式
+	optional ColumnMessage encoding; // Encoding of column
 
-	optional PagePointerPB dict_page // 词典page
-	repeated PagePointerPB bloom_filter_pages; // bloom filter词典信息
-	optional PagePointerPB ordinal_index_page; // 行号索引数据
-	optional PagePointerPB page_zone_map_page; // page级别统计信息索引数据
+	optional PagePointerPB dict_page // Dictionary page
+	repeated PagePointerPB bloom_filter_pages; // Bloom filter pages
+	optional PagePointerPB ordinal_index_page; // Ordinal index page
+	optional PagePointerPB page_zone_map_page; // Page level of statistics index data
 
-	optional PagePointerPB bitmap_index_page; // bitmap索引数据
+	optional PagePointerPB bitmap_index_page; // Bitmap index page
 
-	optional uint64 data_footprint; // 列中索引的大小
-	optional uint64 index_footprint; // 列中数据的大小
-	optional uint64 raw_data_footprint; // 原始列数据大小
+	optional uint64 data_footprint; // The size of the index in the column
+	optional uint64 index_footprint; // The size of the data in the column
+	optional uint64 raw_data_footprint; // Original column data size
 
-	optional CompressKind compress_kind; // 列的压缩方式
+	optional CompressKind compress_kind; // Column compression type
 
-	optional ZoneMapPB column_zone_map; //文件级别的过滤条件
+	optional ZoneMapPB column_zone_map; // Segment level of statistics index data
 	repeated MetadataPairPB column_meta_datas;
 }
 
-message FileFooterPB {
-	optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用
-	repeated ColumnPB schema = 5; // 列Schema
-    optional uint64 num_values = 4; // 文件中保存的行数
-    optional uint64 index_footprint = 7; // 索引大小
-    optional uint64 data_footprint = 8; // 数据大小
-	optional uint64 raw_data_footprint = 8; // 原始数据大小
-
-    optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
-    repeated ColumnMetaPB column_metas = 10; // 列元数据
-	optional PagePointerPB key_index_page; // short key索引page
+message SegmentFooterPB {
+	optional uint32 version = 2 [default = 1]; // For version compatibility and upgrade use
+	repeated ColumnPB schema = 5; // Schema of columns
+  optional uint64 num_values = 4; // Number of lines saved in the file
+  optional uint64 index_footprint = 7; // Index size
+  optional uint64 data_footprint = 8; // Data size
+	optional uint64 raw_data_footprint = 8; // Original data size
+
+  optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // Compression type
+  repeated ColumnMetaPB column_metas = 10; // Column metadata
+	optional PagePointerPB key_index_page = 11; // short key index page
 }
 
 ```
diff --git a/docs/zh-CN/internal/doris_storage_optimization.md b/docs/zh-CN/internal/doris_storage_optimization.md
index 3d65388..f281649 100644
--- a/docs/zh-CN/internal/doris_storage_optimization.md
+++ b/docs/zh-CN/internal/doris_storage_optimization.md
@@ -115,21 +115,23 @@ non-nullable data page结构如下:
 该格式设计支持后续扩展其他的索引信息,比如bitmap索引,spatial索引等等,只需要将需要的数据写到现有的列数据后面,并且添加对应的元数据字段到FileFooterPB中
 
 ### 元数据定义 ###
-FileFooterPB的定义为:
+SegmentFooterPB的定义为:
 
 ```
 message ColumnPB {
-    optional uint32 column_id = 1; // 这里使用column id,不使用column name是因为计划支持修改列名
-    optional string type = 2; // 列类型
-    optional string aggregation = 3; // 是否聚合
-    optional uint32 length = 4; // 长度
-    optional bool is_key = 5; // 是否是主键列
-    optional string default_value = 6; // 默认值
-    optional uint32 precision = 9 [default = 27]; // 精度
-    optional uint32 frac = 10 [default = 9];
-    optional bool is_nullable = 11 [default=false]; // 是否有null
-    optional bool is_bf_column = 15 [default=false]; // 是否有bf词典
-	optional bool is_bitmap_column = 16 [default=false]; // 是否有bitmap索引
+    required int32 unique_id = 1;   // 这里使用column id, 不使用column name是因为计划支持修改列名
+    optional string name = 2;   // 列的名字,  当name为__DORIS_DELETE_SIGN__, 表示该列为隐藏的删除列
+    required string type = 3;   // 列类型
+    optional bool is_key = 4;   // 是否是主键列
+    optional string aggregation = 5;    // 聚合方式
+    optional bool is_nullable = 6;      // 是否有null
+    optional bytes default_value = 7;   // 默认值
+    optional int32 precision = 8;       // 精度
+    optional int32 frac = 9;
+    optional int32 length = 10;         // 长度
+    optional int32 index_length = 11;   // 索引长度
+    optional bool is_bf_column = 12;    // 是否有bf词典
+    optional bool has_bitmap_index = 15 [default=false];  // 是否有bitmap索引
 }
 
 // page偏移
@@ -163,16 +165,16 @@ message ColumnMetaPB {
 	repeated MetadataPairPB column_meta_datas;
 }
 
-message FileFooterPB {
+message SegmentFooterPB {
 	optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用
 	repeated ColumnPB schema = 5; // 列Schema
-    optional uint64 num_values = 4; // 文件中保存的行数
-    optional uint64 index_footprint = 7; // 索引大小
-    optional uint64 data_footprint = 8; // 数据大小
+  optional uint64 num_values = 4; // 文件中保存的行数
+  optional uint64 index_footprint = 7; // 索引大小
+  optional uint64 data_footprint = 8; // 数据大小
 	optional uint64 raw_data_footprint = 8; // 原始数据大小
 
-    optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
-    repeated ColumnMetaPB column_metas = 10; // 列元数据
+  optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
+  repeated ColumnMetaPB column_metas = 10; // 列元数据
 	optional PagePointerPB key_index_page; // short key索引page
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org