You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ji...@apache.org on 2021/11/09 10:00:23 UTC
[incubator-doris] branch master updated: Chinese annotation modification (#6958)

This is an automated email from the ASF dual-hosted git repository.

jiafengzheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 088a16d  Chinese annotation modification  (#6958)
088a16d is described below

commit 088a16d33b78e4978f6ff0a8f0028d8aba49da3b
Author: jiafeng.zhang <zh...@gmail.com>
AuthorDate: Tue Nov 9 18:00:14 2021 +0800

    Chinese annotation modification  (#6958)
    
    * Modify Chinese comment (#6951)
---
 be/src/exec/olap_scan_node.cpp                   |  16 +--
 be/src/olap/rowset/bit_field_writer.h            |   2 +-
 be/src/olap/rowset/column_reader.h               | 139 +++++++++++------------
 be/src/olap/rowset/column_writer.cpp             |  34 +++---
 be/src/olap/rowset/column_writer.h               |  40 +++----
 be/src/olap/rowset/run_length_integer_reader.cpp |   2 +-
 be/src/olap/rowset/segment_reader.cpp            |  32 +++---
 be/src/olap/rowset/segment_reader.h              | 124 ++++++++++----------
 be/src/olap/serialize.h                          |  53 ++++-----
 be/src/olap/storage_engine.h                     |  10 +-
 be/src/olap/stream_index_common.h                |  35 +++---
 be/test/exec/csv_scan_node_test.cpp              |   8 +-
 be/test/exec/parquet_scanner_test.cpp            |  10 +-
 13 files changed, 254 insertions(+), 251 deletions(-)

diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index 746e553..9157e1b 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -1337,14 +1337,14 @@ void OlapScanNode::transfer_thread(RuntimeState* state) {
     ThreadPoolToken* thread_token = state->get_query_fragments_ctx()->get_token();
 
     /*********************************
-     * 优先级调度基本策略:
-     * 1. 通过查询拆分的Range个数来确定初始nice值
-     *    Range个数越多，越倾向于认定为大查询，nice值越小
-     * 2. 通过查询累计读取的数据量来调整nice值
-     *    读取的数据越多，越倾向于认定为大查询，nice值越小
-     * 3. 通过nice值来判断查询的优先级
-     *    nice值越大的，越优先获得的查询资源
-     * 4. 定期提高队列内残留任务的优先级，避免大查询完全饿死
+     * The basic strategy of priority scheduling:
+     * 1. Determine the initial nice value by querying the number of split ranges
+     *    The more the number of Ranges, the more likely it is to be recognized as a large query, and the smaller the nice value
+     * 2. Adjust the nice value by querying the accumulated data volume
+     *    The more data read, the more likely it is to be regarded as a large query, and the smaller the nice value
+     * 3. Judge the priority of the query by the nice value
+     *    The larger the nice value, the more preferentially obtained query resources
+     * 4. Regularly increase the priority of the remaining tasks in the queue to avoid starvation for large queries
      *********************************/
     PriorityThreadPool* thread_pool = state->exec_env()->scan_thread_pool();
     _total_assign_num = 0;
diff --git a/be/src/olap/rowset/bit_field_writer.h b/be/src/olap/rowset/bit_field_writer.h
index 32a40e4..6812833 100644
--- a/be/src/olap/rowset/bit_field_writer.h
+++ b/be/src/olap/rowset/bit_field_writer.h
@@ -31,7 +31,7 @@ public:
     explicit BitFieldWriter(OutStream* output);
     ~BitFieldWriter();
     OLAPStatus init();
-    // 写入一个bit, bit_value为true表示写入1, false表示写入0
+    // Write a bit, bit_value is true means writing 1, false means writing 0
     OLAPStatus write(bool bit_value);
     OLAPStatus flush();
     void get_position(PositionEntryWriter* index_entry) const;
diff --git a/be/src/olap/rowset/column_reader.h b/be/src/olap/rowset/column_reader.h
index f0eeffd..a57a3eb 100644
--- a/be/src/olap/rowset/column_reader.h
+++ b/be/src/olap/rowset/column_reader.h
@@ -38,7 +38,7 @@ class BitFieldReader;
 class RowIndexEntryMessage;
 class ColumnEncodingMessage;
 
-// 解出流
+// Solution flow
 inline ReadOnlyFileStream* extract_stream(uint32_t column_unique_id, StreamInfoMessage::Kind kind,
                                           std::map<StreamName, ReadOnlyFileStream*>* streams) {
     StreamName stream_name(column_unique_id, kind);
@@ -56,34 +56,34 @@ typedef std::unordered_map<uint32_t, PositionProvider> UniqueIdPositionProviderM
 // Unique id -> ColumnEncodingMessage
 typedef std::map<uint32_t, ColumnEncodingMessage> UniqueIdEncodingMap;
 
-// Integer和String的读取器。
-// 这些读取器虽然也冠有Reader之名，但注意并不从ColumnReader继承
-// 因此不考虑空值的情况。
+// Readers for Integer and String.
+// Although these readers are also named Reader, note that they do not inherit from ColumnReader
+// Therefore, the case of null values is not considered.
 
-// 对于SHORT/INT/LONG类型的数据，统一使用int64作为存储的数据
-// 由于使用变长编码，所以不会造成浪费
-// IntegerColumnReader是读取底层int64数据的reader，上层返回时
-// 使用IntColumnReaderWrapper转为具体的数据类型
+// For SHORT/INT/LONG type data, use int64 as the stored data uniformly
+// Due to the use of variable length coding, it will not cause waste
+// IntegerColumnReader is a reader that reads the int64 data of the bottom layer, when the upper layer returns
+// Use IntColumnReaderWrapper to convert to a specific data type
 //
-// NOTE. 由于RLE读取器只能读int64，这点和java不同，java整形是不考虑符号的
-// 那么这东西实际上似乎是�
-// ��法支持无符号整形的，需要注意后续是否修改RLEReader
+// NOTE. Since the RLE reader can only read int64, which is different from java, java shaping does not consider symbols
+// Then this thing actually seems to be �
+// �� method supports unsigned integer shaping, you need to pay attention to whether to modify RLEReader in the future
 class IntegerColumnReader {
 public:
     IntegerColumnReader(uint32_t column_unique_id);
     ~IntegerColumnReader();
     /**
-     * 初始化Integer列读取器
-     * @param  streams 包含所需要流的map
-     * @param  is_sign 所读取的数是否有符号
-     * @return         [description]
-     */
+      * Initialize the Integer column reader
+      * @param streams contains the map of the required stream
+      * @param is_sign whether the number read has a sign
+      * @return [description]
+      */
     OLAPStatus init(std::map<StreamName, ReadOnlyFileStream*>* streams, bool is_sign);
-    // 将内部指针定位到positions
+    // Position the internal pointer to positions
     OLAPStatus seek(PositionProvider* positions);
-    // 将内部指针向后移动row_count行
+    // Move the internal pointer back row_count rows
     OLAPStatus skip(uint64_t row_count);
-    // 返回当前行的数据，通过将内部指针移向下一行
+    // Return the data of the current row by moving the internal pointer to the next row
     OLAPStatus next(int64_t* value);
     bool eof() { return _eof; }
 
@@ -93,8 +93,8 @@ private:
     RunLengthIntegerReader* _data_reader;
 };
 
-// 对于使用Direct方式编码的字符串列的读取器
-// Direct方式的String直接读取即可
+// For readers of string columns encoded in Direct mode
+// Direct method of String can be read directly
 class StringColumnDirectReader {
 public:
     StringColumnDirectReader(uint32_t column_unique_id, uint32_t dictionary_size);
@@ -104,9 +104,9 @@ public:
                     MemPool* mem_pool);
     OLAPStatus seek(PositionProvider* positions);
     OLAPStatus skip(uint64_t row_count);
-    // 返回当前行的数据，并将内部指针向后移动
-    // buffer - 返回数据的缓冲区
-    // length - 输入时作为缓存区大小，返回时给出字符串的大小
+    // Return the data of the current row and move the internal pointer backward
+    // buffer - the buffer of the returned data
+    // length - the size of the buffer area when input, and the size of the string when returning
     OLAPStatus next(char* buffer, uint32_t* length);
     OLAPStatus next_vector(ColumnVector* column_vector, uint32_t size, MemPool* mem_pool,
                            int64_t* read_bytes);
@@ -121,14 +121,14 @@ private:
     RunLengthIntegerReader* _length_reader;
 };
 
-// 对于使用字典编码的字符串列的读取器
-// 接口同StringColumnDirectReader
-// 读取的流程：
-// 1. 读取全部的字典数据，保存在整块buffer中
-// 2. 读取length数据，构造偏移字典，偏移是�
-// ��每个string的起始，与1组合能够读取数据
-// 3. 需要时读取实际保存的数据�
-// ��是一个int）。根据这个int找出偏移，再根据偏移读出字典数据
+// For readers using dictionary-encoded string columns
+// The interface is the same as StringColumnDirectReader
+// Reading process:
+// 1. Read all the dictionary data and save it in the whole buffer
+// 2. Read the length data, construct the offset dictionary, the offset is �
+// ��The beginning of each string, combined with 1 can read data
+// 3. Read the actual saved data when needed.
+// �� is an int). Find the offset according to this int, and then read the dictionary data according to the offset
 class StringColumnDictionaryReader {
 public:
     StringColumnDictionaryReader(uint32_t column_unique_id, uint32_t dictionary_size);
@@ -150,27 +150,28 @@ private:
     Slice* _values;
     char* _read_buffer;
     //uint64_t _dictionary_size;
-    //uint64_t* _offset_dictionary;   // 用来查找响应数据的数字对应的offset
-    //StorageByteBuffer* _dictionary_data_buffer;   // 保存dict数据
+    //uint64_t* _offset_dictionary;   // The offset corresponding to the number used to find the response data
+    //StorageByteBuffer* _dictionary_data_buffer;   // Save dict data
     std::vector<std::string> _dictionary;
-    RunLengthIntegerReader* _data_reader; // 用来读实际的数据（用一个integer表示）
+    // Used to read the actual data (represented by an integer)
+    RunLengthIntegerReader* _data_reader; 
 };
 
-// ColumnReader用于读取一个列, 是其他XXXColumnReader的基类
-// ColumnReader通过present的bit field维护了列的NULL特性
+// ColumnReader is used to read a column and is the base class of other XXXColumnReader
+// ColumnReader maintains the NULL feature of the column through the bit field presented
 class ColumnReader {
 public:
-    // 工厂方法, 创建ColumnReader, 如果列有子列, 递归创建sub reader
-    // 如果需要读取的列在segment_columns中不存在, 则:
-    //     1. 如果列允许Null值, 则创建一个NullValueReader
-    //     2. 如果列不允许Null值, 但有默认值, 则创建一个DefaultValueReader
-    //     3. 否则创建失败
+    // Factory method, create ColumnReader, if the column has sub-columns, recursively create a sub reader
+    // If the column to be read does not exist in segment_columns, then:
+    //       1.If the column allows Null values, create a NullValueReader
+    //       2.If the column does not allow Null values, but has a default value, create a DefaultValueReader
+    //       3.Otherwise the creation fails
     // Input:
-    //     column_id - 需要创建的列在columns中的位置
-    //     columns - 表的schema
-    //     included - 需要创建的列, 如果某列的unique id在included中则创建
-    //     segment_columns - segment中所有column的unique id组成的集合
-    //     encodings - 列的编码信息, 使用encodings[_column_unique_id]访问
+    //       column_id - the position of the column to be created in the columns
+    //       columns - the schema of the table
+    //       included - column to be created, if the unique id of a column is included in included
+    //       segment_columns - a collection of unique ids of all columns in the segment
+    //       encodings - column encoding information, use encodings[_column_unique_id] to access
     static ColumnReader* create(uint32_t column_id, const TabletSchema& schema,
                                 const UniqueIdToColumnIdMap& included,
                                 UniqueIdToColumnIdMap& segment_included,
@@ -184,20 +185,18 @@ public:
     ColumnReader(uint32_t column_id, uint32_t column_unique_id);
     virtual ~ColumnReader();
 
-    // 使用streams初始化Reader
-    // ColumnReader仅初始化一次，每次使用时分配新的对象。
+    // Use streams to initialize Reader
+    // ColumnReader is initialized only once, and a new object is allocated each time it is used.
     // Input:
-    //     streams - 输入stream
+    //       streams-input stream
     virtual OLAPStatus init(std::map<StreamName, ReadOnlyFileStream*>* streams, int size,
                             MemPool* mem_pool, OlapReaderStatistics* stats);
 
-    // 设置下一个返回的数据的位置
-    // positions是各个列需要seek的位置, ColumnReader通过(*positions)[_column_unique_id]
-    // 获得本列需要seek的位置
+    // Set the position of the next returned data
+    // positions are the positions where each column needs to seek, ColumnReader passes (*positions)[_column_unique_id]
+    // Get the seek position of this column
     virtual OLAPStatus seek(PositionProvider* positions);
 
-    // TODO. 这点不是很明白，为什么present不用skip，
-    // 如果上层skip过而底层不skip，next判断空不空不是不准了吗
     virtual OLAPStatus skip(uint64_t row_count);
 
     virtual OLAPStatus next_vector(ColumnVector* column_vector, uint32_t size, MemPool* mem_pool);
@@ -209,16 +208,16 @@ public:
     virtual size_t get_buffer_size() { return 0; }
 
 protected:
-    // NOTE. 统计rows中的非空行。这是因为实际存储中，“空行”并不存在，
-    // 所以对于可能为空的上层字段（例如integer），调用者希望跳过10行，
-    // 但实际上对于
+    // NOTE. Count the non-blank rows in rows. This is because "blank lines" do not exist in actual storage.
+    // So for upper-level fields that may be empty (such as integer), the caller wants to skip 10 lines,
+    // but actually for
     uint64_t _count_none_nulls(uint64_t rows);
 
     bool _value_present;
     bool* _is_null;
-    uint32_t _column_id;             // column在schema内的id
-    uint32_t _column_unique_id;      // column的唯一id
-    BitFieldReader* _present_reader; // NULLabel的字段的NULL值
+    uint32_t _column_id;             // The id of the column in the schema
+    uint32_t _column_unique_id;      // the unique id of the column
+    BitFieldReader* _present_reader; // NULL value of NULLabel field
     std::vector<ColumnReader*> _sub_readers;
     OlapReaderStatistics* _stats = nullptr;
 };
@@ -426,7 +425,7 @@ private:
     RunLengthByteReader* _data_reader;
 };
 
-// IntColumnReader的包裹器, 实现了对ColumnReader的接口
+// A wrapper for IntColumnReader, which implements the interface to ColumnReader
 template <class T, bool is_sign>
 class IntegerColumnReaderWrapper : public ColumnReader {
 public:
@@ -519,14 +518,14 @@ public:
     virtual size_t get_buffer_size() { return sizeof(RunLengthIntegerReader); }
 
 private:
-    IntegerColumnReader _reader; // 被包裹的真实读取器
+    IntegerColumnReader _reader; // Wrapped real reader
     T* _values;
     bool _eof;
 };
 
-// OLAP Engine中有两类字符串，定长字符串和变长字符串，分别使用两个Wrapper
-// class 处理对这两种字符串的返回格式
-// FixLengthStringColumnReader 处理定长字符串，特点是不足长度的部分要补0
+// There are two types of strings in OLAP Engine, fixed-length strings and variable-length strings, using two wrappers respectively
+// class handles the return format of these two strings
+// FixLengthStringColumnReader handles fixed-length strings, the feature is that the part of insufficient length should be filled with 0
 template <class ReaderClass>
 class FixLengthStringColumnReader : public ColumnReader {
 public:
@@ -594,7 +593,7 @@ private:
     uint32_t _string_length;
 };
 
-// VarStringColumnReader 处理变长长字符串，特点是在数据头部使用uint16表示长度
+// VarStringColumnReader handles variable length strings, characterized by using uint16 in the data header to indicate the length
 template <class ReaderClass>
 class VarStringColumnReader : public ColumnReader {
 public:
@@ -829,11 +828,11 @@ typedef FloatintPointColumnReader<float> FloatColumnReader;
 typedef FloatintPointColumnReader<double> DoubleColumnReader;
 typedef IntegerColumnReaderWrapper<int64_t, true> DiscreteDoubleColumnReader;
 
-// 使用3个字节存储的日期
-// 使用IntegerColumnReader，在返回数据时截断到3字节长度
+// Use 3 bytes to store the date
+// Use IntegerColumnReader, truncated to 3 bytes length when returning data
 typedef IntegerColumnReaderWrapper<uint24_t, false> DateColumnReader;
 
-// 内部使用LONG实现
+// Internal use LONG implementation
 typedef IntegerColumnReaderWrapper<uint64_t, false> DateTimeColumnReader;
 
 } // namespace doris
diff --git a/be/src/olap/rowset/column_writer.cpp b/be/src/olap/rowset/column_writer.cpp
index bdaf575..60a9017 100644
--- a/be/src/olap/rowset/column_writer.cpp
+++ b/be/src/olap/rowset/column_writer.cpp
@@ -309,11 +309,11 @@ uint64_t ColumnWriter::estimate_buffered_memory() {
     return result;
 }
 
-// 删去is_present_stream使用的positions:
-//  * OutStream使用2个
-//  * ByteRunLength 使用1个
-//  * BitRunLength 使用1个
-// 一共删去4个
+// Delete the positions used by is_present_stream:
+// * OutStream uses 2
+// * ByteRunLength uses 1
+// * BitRunLength uses 1
+// Delete 4 in total
 void ColumnWriter::_remove_is_present_positions() {
     for (uint32_t i = 0; i < _index.entry_size(); i++) {
         PositionEntryWriter* entry = _index.mutable_entry(i);
@@ -337,7 +337,7 @@ OLAPStatus ColumnWriter::finalize(ColumnDataHeaderMessage* header) {
 
     char* index_buf = NULL;
     // char* index_statistic_buf = NULL;
-    // 写index的pb
+    // Write index pb
     size_t pb_size = _index.output_size();
     index_buf = new (std::nothrow) char[pb_size];
     ColumnMessage* column = NULL;
@@ -377,8 +377,8 @@ OLAPStatus ColumnWriter::finalize(ColumnDataHeaderMessage* header) {
         }
     }
 
-    // 在Segment头中记录一份Schema信息
-    // 这样使得修改表的Schema后不影响对已存在的Segment中的数据读取
+    // Record a Schema information in the Segment header
+    // This makes it not affect the reading of the data in the existing segment after modifying the schema of the table
     column = header->add_column();
     column->set_name(_column.name());
     column->set_type(TabletColumn::get_string_by_field_type(_column.type()));
@@ -404,7 +404,7 @@ void ColumnWriter::record_position() {
     }
 }
 
-// 默认返回DIRECT, String类型的可能返回Dict
+// The default returns DIRECT, String type may return Dict
 void ColumnWriter::save_encoding(ColumnEncodingMessage* encoding) {
     encoding->set_kind(ColumnEncodingMessage::DIRECT);
 }
@@ -614,7 +614,7 @@ OLAPStatus VarStringColumnWriter::_finalize_dict_encoding() {
 
     uint32_t block_id = 0;
 
-    // 假设一共有n个id。（总记录数）
+    // Suppose there are n ids in total. (total)
     for (uint32_t i = 0; i <= _string_id.size(); i++) {
         while (block_id < _block_row_count.size() - 1 && i == _block_row_count[block_id]) {
             _id_writer->get_position(index()->mutable_entry(block_id), false);
@@ -642,8 +642,8 @@ OLAPStatus VarStringColumnWriter::_finalize_direct_encoding() {
 #if 0
 
     for (uint32_t i = 0; i <= _string_id.size(); i++) {
-        // 与其他类型不同，string的record position会向_block_row_count写入条目
-        // 而其他类型在下一次调用create_index_row_entry之前是没有影响的。
+        //Unlike other types, the record position of string will write entries to _block_row_count
+        // Other types have no effect until the next call to create_index_row_entry.
         while (block_id < _block_row_count.size() - 1 &&
                 i == _block_row_count[block_id]) {
             _data_stream->get_position(index()->mutable_entry(block_id));
@@ -694,14 +694,14 @@ OLAPStatus VarStringColumnWriter::finalize(ColumnDataHeaderMessage* header) {
         }
     }
 
-    // 已经完成Index的补写, ColumnWriter::finalize会写入header
+    // The index's supplementary writing has been completed, ColumnWriter::finalize will write the header
     res = ColumnWriter::finalize(header);
     if (OLAP_SUCCESS != res) {
         OLAP_LOG_WARNING("fail to finalize ColumnWriter.");
         return res;
     }
 
-    // id_writer其实用到了data_stream, 重复flush一下没有关系
+    // id_writer is practical to data_stream, it doesn't matter if you repeat flush
     if (OLAP_SUCCESS != _length_writer->flush() || OLAP_SUCCESS != _id_writer->flush() ||
         OLAP_SUCCESS != _dict_stream->flush() || OLAP_SUCCESS != _data_stream->flush()) {
         OLAP_LOG_WARNING("fail to flush stream.");
@@ -725,9 +725,9 @@ void VarStringColumnWriter::save_encoding(ColumnEncodingMessage* encoding) {
     }
 }
 
-// 和其他的Writer不同, 只有到finalize的时候才真正向Stream写入数据,
-// 所以无法记录流的位置, 为此在记录每个block写入的数据条数, 在finalize时
-// 利用该信息向Index中追加stream的位置信息
+// Unlike other Writer, data is written to Stream only when it is finalized.
+// So it is impossible to record the position of the stream. For this reason, record the number of data written in each block, and when finalize
+// Use this information to add stream location information to Index
 void VarStringColumnWriter::record_position() {
     ColumnWriter::record_position();
     _block_row_count.push_back(_string_id.size());
diff --git a/be/src/olap/rowset/column_writer.h b/be/src/olap/rowset/column_writer.h
index a421acb..9ec0261 100644
--- a/be/src/olap/rowset/column_writer.h
+++ b/be/src/olap/rowset/column_writer.h
@@ -45,12 +45,12 @@ class RunLengthIntegerWriter;
 
 class ColumnWriter {
 public:
-    // 创建一个ColumnWriter, 创建后的对象生命期由调用者所有
-    // 即调用者负责调用delete析构ColumnWriter
+    // Create a ColumnWriter, the lifetime of the object after creation is owned by the caller
+    // That is, the caller is responsible for calling delete to destruct the ColumnWriter
     // Args:
-    //    column_id: 创建的列在columns中的位置
-    //    columns: 表的所有的列信息
-    //    stream_factory: 用于创建输出流的工厂对象, 该对象的生命期由调用者所有
+    //      column_id: the position of the created column in columns
+    //      columns: all column information of the table
+    //      stream_factory: The factory object used to create the output stream, the lifetime of the object is owned by the caller
     static ColumnWriter* create(uint32_t column_id, const TabletSchema& schema,
                                 OutStreamFactory* stream_factory, size_t num_rows_per_row_block,
                                 double bf_fpp);
@@ -62,12 +62,12 @@ public:
 
     virtual OLAPStatus write_batch(RowBlock* block, RowCursor* cursor) = 0;
 
-    // 将之前记录的block位置信息与当前的统计信息写入到一个新的索引项中
+    // Write the previously recorded block location information and current statistical information into a new index entry
     OLAPStatus create_row_index_entry();
-    // 估算当前缓存的内存大小, 不包括已经输出到OutStream的内存
+    // Estimate the current cache memory size, excluding the memory that has been output to OutStream
     virtual uint64_t estimate_buffered_memory();
     virtual OLAPStatus flush();
-    // 结束Segment, flush stream并更新header:
+    // End the segment, flush stream and update the header:
     //   * column_unique_id
     //   * column_type
     //   * column_encoding
@@ -92,7 +92,7 @@ protected:
     OutStreamFactory* stream_factory() { return _stream_factory; }
     PositionEntryWriter* index_entry() { return &_index_entry; }
     StreamIndexWriter* index() { return &_index; }
-    // 记录当前Stream的位置,用于生成索引项
+    // Record the position of the current Stream, which is used to generate index entries
     virtual void record_position();
 
 protected:
@@ -106,13 +106,13 @@ private:
 
     uint32_t _column_id;
     const TabletColumn& _column;
-    OutStreamFactory* _stream_factory;       // 该对象由外部调用者所有
-    std::vector<ColumnWriter*> _sub_writers; // 保存子列的writer
+    OutStreamFactory* _stream_factory;       // The object is owned by the external caller
+    std::vector<ColumnWriter*> _sub_writers; // Writer to save the sub-column
     PositionEntryWriter _index_entry;
     StreamIndexWriter _index;
-    BitFieldWriter* _is_present; // 对于允许NULL的列记录NULL Bits
+    BitFieldWriter* _is_present; //Record NULL Bits for columns that allow NULL
     OutStream* _is_present_stream;
-    OutStream* _index_stream; // 注意对象的所有权是_stream_factory
+    OutStream* _index_stream; // Note that the ownership of the object is _stream_factory
     bool _is_found_nulls;
     BloomFilter* _bf;
     BloomFilterIndexWriter _bf_index;
@@ -164,7 +164,7 @@ private:
     DISALLOW_COPY_AND_ASSIGN(ByteColumnWriter);
 };
 
-// 对于SHORT/INT/LONG类型的数据，统一使用int64作为存储的数据
+// For SHORT/INT/LONG type data, use int64 as the stored data uniformly
 class IntegerColumnWriter {
 public:
     IntegerColumnWriter(uint32_t column_id, uint32_t unique_column_id,
@@ -369,7 +369,7 @@ typedef DoubleColumnWriterBase<double> DoubleColumnWriter;
 typedef DoubleColumnWriterBase<float> FloatColumnWriter;
 typedef IntegerColumnWriterWrapper<int64_t, true> DiscreteDoubleColumnWriter;
 
-// VarString和String都作为变长类型使用StringColumnWriter写入
+// VarString and String are used as variable length types to write using StringColumnWriter
 class VarStringColumnWriter : public ColumnWriter {
 public:
     VarStringColumnWriter(uint32_t column_id, OutStreamFactory* stream_factory,
@@ -406,11 +406,11 @@ public:
     virtual OLAPStatus flush() { return OLAP_SUCCESS; }
 
 protected:
-    // 不使用cursor直接写入一条数据
+    //Write a piece of data directly without using cursor
     OLAPStatus write(const char* str, uint32_t length);
 
 private:
-    // 可以在map中使用引用做key
+    // You can use references as keys in the map
     class DictKey {
     public:
         explicit DictKey(const std::string& str_ref) : _str_ref(str_ref) {}
@@ -442,7 +442,7 @@ private:
     DISALLOW_COPY_AND_ASSIGN(VarStringColumnWriter);
 };
 
-// 特例化一下VarStringColumnWriter, 在write的时候提取数据再写入
+// Specialize the VarStringColumnWriter, extract the data and write it when writing
 class FixLengthStringColumnWriter : public VarStringColumnWriter {
 public:
     FixLengthStringColumnWriter(uint32_t column_id, OutStreamFactory* stream_factory,
@@ -484,10 +484,10 @@ private:
     DISALLOW_COPY_AND_ASSIGN(FixLengthStringColumnWriter);
 };
 
-// Date是三字节整数
+//Date is a three-byte integer
 typedef IntegerColumnWriterWrapper<uint24_t, false> DateColumnWriter;
 
-// DateTime是用int64实现的
+// DateTime is implemented with int64
 typedef IntegerColumnWriterWrapper<uint64_t, false> DateTimeColumnWriter;
 
 class DecimalColumnWriter : public ColumnWriter {
diff --git a/be/src/olap/rowset/run_length_integer_reader.cpp b/be/src/olap/rowset/run_length_integer_reader.cpp
index 52c5aeb..35952a9 100644
--- a/be/src/olap/rowset/run_length_integer_reader.cpp
+++ b/be/src/olap/rowset/run_length_integer_reader.cpp
@@ -219,7 +219,7 @@ OLAPStatus RunLengthIntegerReader::_read_patched_base_values(uint8_t first_byte)
     int64_t mask = (1L << ((bw * 8) - 1));
 
     // if MSB of base value is 1 then base is negative value else positive
-    // TODO(lijiao) : 为什么这里不用zig_zag来表示？
+    // TODO(lijiao): Why is zig_zag not used here?
     if ((base & mask) != 0) {
         base = base & ~mask;
         base = -base;
diff --git a/be/src/olap/rowset/segment_reader.cpp b/be/src/olap/rowset/segment_reader.cpp
index 8314d0a..8058d67 100644
--- a/be/src/olap/rowset/segment_reader.cpp
+++ b/be/src/olap/rowset/segment_reader.cpp
@@ -49,7 +49,7 @@ SegmentReader::SegmentReader(const std::string file, SegmentGroup* segment_group
           _delete_status(delete_status),
           _eof(false),
           _end_block(-1),
-          // 确保第一次调用_move_to_next_row，会执行seek_to_block
+          // Make sure that the first call to _move_to_next_row will execute seek_to_block
           _block_count(0),
           _num_rows_in_block(0),
           _null_supported(false),
@@ -141,7 +141,7 @@ OLAPStatus SegmentReader::_load_segment_file() {
         return res;
     }
 
-    // 如果需要mmap，则进行映射
+    // If mmap is needed, then map
     if (_is_using_mmap) {
         _mmap_buffer = StorageByteBuffer::mmap(&_file_handler, 0, PROT_READ, MAP_PRIVATE);
 
@@ -202,7 +202,7 @@ OLAPStatus SegmentReader::init(bool is_using_cache) {
         OLAP_LOG_WARNING("fail to load segment file. ");
         return res;
     }
-    // 文件头
+    // File header
     res = _set_segment_info();
     if (OLAP_SUCCESS != res) {
         OLAP_LOG_WARNING("fail to set segment info. ");
@@ -344,12 +344,12 @@ void SegmentReader::_set_column_map() {
     size_t segment_column_size = _header_message().column_size();
     for (ColumnId segment_column_id = 0; segment_column_id < segment_column_size;
          ++segment_column_id) {
-        // 如果找得到，建立映射表
+        // If you can find it, create a mapping table
         ColumnId unique_column_id = _header_message().column(segment_column_id).unique_id();
         if (_unique_id_to_tablet_id_map.find(unique_column_id) !=
             _unique_id_to_tablet_id_map.end()) {
             _unique_id_to_segment_id_map[unique_column_id] = segment_column_id;
-            // encoding 应该和segment schema序一致。
+            // The encoding should be in the same order as the segment schema.
             _encodings_map[unique_column_id] = _header_message().column_encoding(segment_column_id);
         }
     }
@@ -595,9 +595,9 @@ OLAPStatus SegmentReader::_load_index(bool is_using_cache) {
                                       _header_message().num_rows_per_block()));
     for (int64_t stream_index = 0; stream_index < _header_message().stream_info_size();
          ++stream_index, stream_offset += stream_length) {
-        // 查找需要的index, 虽然有的index不需要读
-        // 取，但为了获取offset，还是要计算一遍
-        // 否则无法拿到正确的streamoffset
+        // Find the required index, although some indexes do not need to be read
+        // Take, but in order to get the offset, it is still necessary to calculate it again
+        // Otherwise, the correct streamoffset cannot be obtained
         const StreamInfoMessage& message = _header_message().stream_info(stream_index);
         stream_length = message.length();
         ColumnId unique_column_id = message.column_unique_id();
@@ -624,12 +624,12 @@ OLAPStatus SegmentReader::_load_index(bool is_using_cache) {
         _cache_handle[cache_handle_index] = _lru_cache->lookup(key);
 
         if (NULL != _cache_handle[cache_handle_index]) {
-            // 1. 如果在lru中，取出buffer，并用来初始化index reader
+            // 1. If you are in lru, take out the buffer and use it to initialize the index reader
             is_using_cache = true;
             stream_buffer =
                     reinterpret_cast<char*>(_lru_cache->value(_cache_handle[cache_handle_index]));
         } else {
-            // 2. 如果不在lru中，需要创建index stream。
+            // 2. If it is not in lru, you need to create an index stream.
             stream_buffer = new (std::nothrow) char[stream_length];
             if (NULL == stream_buffer) {
                 OLAP_LOG_WARNING(
@@ -648,11 +648,11 @@ OLAPStatus SegmentReader::_load_index(bool is_using_cache) {
             }
 
             if (is_using_cache) {
-                // 将读出的索引放入lru中。
+                // Put the read index into lru.
                 _cache_handle[cache_handle_index] = _lru_cache->insert(
                         key, stream_buffer, stream_length, &_delete_cached_index_stream);
                 if (NULL == _cache_handle[cache_handle_index]) {
-                    // 这里可能是cache insert中的malloc失败了, 先返回成功
+                    // It may be that malloc in cache insert failed, first return success
                     LOG(FATAL) << "fail to insert lru cache.";
                 }
             }
@@ -675,7 +675,7 @@ OLAPStatus SegmentReader::_load_index(bool is_using_cache) {
 
             _indices[unique_column_id] = index_message;
 
-            // 每个index的entry数量应该一致, 也就是block的数量
+            // The number of entries for each index should be the same, that is, the number of blocks
             _block_count = index_message->entry_count();
         } else {
             BloomFilterIndexReader* bf_message = new (std::nothrow) BloomFilterIndexReader;
@@ -695,7 +695,7 @@ OLAPStatus SegmentReader::_load_index(bool is_using_cache) {
 
             _bloom_filters[unique_column_id] = bf_message;
 
-            // 每个index的entry数量应该一致, 也就是block的数量
+            // The number of entries for each index should be the same, that is, the number of blocks
             _block_count = bf_message->entry_count();
         }
 
@@ -721,7 +721,7 @@ OLAPStatus SegmentReader::_read_all_data_streams(size_t* buffer_size) {
     int64_t stream_offset = _header_length;
     uint64_t stream_length = 0;
 
-    // 每条流就一块整的
+    // Each stream is one piece
     for (int64_t stream_index = 0; stream_index < _header_message().stream_info_size();
          ++stream_index, stream_offset += stream_length) {
         const StreamInfoMessage& message = _header_message().stream_info(stream_index);
@@ -769,7 +769,7 @@ OLAPStatus SegmentReader::_create_reader(size_t* buffer_size) {
     _column_indices.resize(_segment_group->get_tablet_schema().num_columns(), nullptr);
     for (auto table_column_id : _used_columns) {
         ColumnId unique_column_id = _tablet_id_to_unique_id_map[table_column_id];
-        // 当前是不会出现table和segment的schema不一致的情况的
+        // Currently, there will be no inconsistencies in the schema of the table and the segment.
         std::unique_ptr<ColumnReader> reader(ColumnReader::create(
                 table_column_id, _segment_group->get_tablet_schema(), _unique_id_to_tablet_id_map,
                 _unique_id_to_segment_id_map, _encodings_map));
diff --git a/be/src/olap/rowset/segment_reader.h b/be/src/olap/rowset/segment_reader.h
index fb462c6..0d3aef0 100644
--- a/be/src/olap/rowset/segment_reader.h
+++ b/be/src/olap/rowset/segment_reader.h
@@ -44,7 +44,7 @@
 
 namespace doris {
 
-// SegmentReader 用于读取一个Segment文件
+// SegmentReader is used to read a Segment file
 class SegmentReader {
 public:
     SegmentReader(const std::string file, SegmentGroup* segment_group, uint32_t segment_id,
@@ -56,10 +56,10 @@ public:
 
     ~SegmentReader();
 
-    // 初始化segmentreader：
-    // 1. 反序列化pb头，获取必要的信息；
-    // 2. 检查文件版本
-    // 3. 获取解压缩器
+    // Initialize segmentreader:
+    // 1. Deserialize the pb header to obtain the necessary information;
+    // 2. Check the file version
+    // 3. Get the decompressor
     // @return [description]
     OLAPStatus init(bool is_using_cache);
 
@@ -67,15 +67,15 @@ public:
     // TODO(zc)
     OLAPStatus prepare(const std::vector<uint32_t>& columns);
 
-    // 指定读取的第一个block和最后一个block，并初始化column reader
-    // seek_to_block支持被多次调用
+    // Specify the first block and the last block to read, and initialize the column reader
+    // seek_to_block supports being called multiple times
     // Inputs:
-    //   first_block: 需要读取的第一个block
-    //   last_block:  需要读取的最后一个block,如果last_block大于最大的block,
-    //                则读取所有的block
-    // 1. 按conditions过滤segment_group中的统计信息,  确定需要读取的block列表
-    // 2. 读取blocks, 构造InStream
-    // 3. 创建并初始化Readers
+    //   first_block: the first block that needs to be read
+    //   last_block: The last block that needs to be read, if last_block is greater than the largest block,
+    // read all blocks
+    // 1. Filter the statistics in segment_group according to conditions and determine the list of blocks that need to be read
+    // 2. Read blocks, construct InStream
+    // 3. Create and initialize Readers
     // Outputs:
     // next_block_id:
     //      block with next_block_id would read if get_block called again.
@@ -92,22 +92,22 @@ public:
 
     bool eof() const { return _eof; }
 
-    // 返回当前segment中block的数目
+    // Returns the number of blocks in the current segment
     uint32_t block_count() const { return _block_count; }
 
-    // 返回当前segment中，每块的行数
+    // Returns the number of rows in each block in the current segment
     uint32_t num_rows_in_block() { return _num_rows_in_block; }
 
     bool is_using_mmap() { return _is_using_mmap; }
 
-    // 只允许在初始化之前选择，之后则无法更改
-    // 暂时没有动态切换的需求
+    // Only allowed to be selected before initialization, and cannot be changed afterwards
+    // There is no need for dynamic switching at the moment
     void set_is_using_mmap(bool is_using_mmap) { _is_using_mmap = is_using_mmap; }
 
 private:
     typedef std::vector<ColumnId>::iterator ColumnIdIterator;
 
-    // 用于表示一段要读取的数据范围
+    // Used to indicate a range of data to be read
     struct DiskRange {
         int64_t offset;
         int64_t end;
@@ -126,8 +126,8 @@ private:
 
     static void _delete_cached_index_stream(const CacheKey& key, void* value);
 
-    // 判断当前列是否需要读取
-    // 当_include_columns为空时，直接返回true
+    // Determine whether the current column needs to be read
+    // When _include_columns is empty, return true directly
     inline bool _is_column_included(ColumnId column_unique_id) {
         return _include_columns.count(column_unique_id) != 0;
     }
@@ -136,43 +136,43 @@ private:
         return _include_bf_columns.count(column_unique_id) != 0;
     }
 
-    // 加载文件和必要的文件信息
+    // Load files and necessary file information
     OLAPStatus _load_segment_file();
 
-    // 设置encoding map，创建列时使用
+    // Set the encoding map and use it when creating columns
     void _set_column_map();
 
-    // 从header中获取当前文件压缩格式，并生成解压器，可通过_decompressor调用
-    // @return 返回OLAP_SUCCESS代表版本检查通过
+    //Get the current file compression format from the header and generate a decompressor, which can be called by _decompressor
+    // @return Return OLAP_SUCCESS on behalf of the version check passed
     OLAPStatus _set_decompressor();
 
-    // 设置segment的相关信息，解压器，列，编码等
+    // Set segment related information, decompressor, column, encoding, etc.
     OLAPStatus _set_segment_info();
 
-    // 检查列存文件版本
-    // @return 返回OLAP_SUCCESS代表版本检查通过
+    // Check the listed file version
+    // @return Return OLAP_SUCCESS on behalf of the version check passed
     OLAPStatus _check_file_version();
 
-    // 选出要读取的列
+    // Select the column to be read
     OLAPStatus _pick_columns();
 
-    // 根据条件选出要读取的范围，会用条件在first block和last block之间标记合适的区块
-    // NOTE. 注意范围是[first_block, last_block], 闭区间
-    // @param  first_block 起始块号
-    // @param  last_block  结束块号
+    // Select the range to be read according to the conditions, and use the conditions to mark the appropriate block between the first block and the last block
+    // NOTE. Note that the range is [first_block, last_block], closed interval
+    // @param  first_block : Starting block number
+    // @param  last_block  : End block number
     // @return
     OLAPStatus _pick_row_groups(uint32_t first_block, uint32_t last_block);
     OLAPStatus _pick_delete_row_groups(uint32_t first_block, uint32_t last_block);
 
-    // 加载索引，将需要的列的索引读入内存
+    // Load the index, read the index of the required column into memory
     OLAPStatus _load_index(bool is_using_cache);
 
-    // 读出所有列，完整的流，（这里只是创建stream，在orc file里因为没有mmap因
-    // 此意味着实际的数据读取， 而在这里并没有实际的读，只是圈出来需要的范围）
+    // Read all the columns, the complete stream, (here just create the stream, because there is no mmap in the orc file, 
+    // it means the actual data is read, but there is no actual read here, just circle the required range)
     OLAPStatus _read_all_data_streams(size_t* buffer_size);
 
-    // 过滤并读取，（和_read_all_data_streams一样，也没有实际的读取数据）
-    // 创建reader
+    // Filter and read, (like _read_all_data_streams, there is no actual read data)
+    // Create reader
     OLAPStatus _create_reader(size_t* buffer_size);
 
     // we implement seek to block in two phase. first, we just only move _next_block_id
@@ -184,12 +184,12 @@ private:
     // because some columns may not be read
     OLAPStatus _seek_to_block_directly(int64_t block_id, const std::vector<uint32_t>& cids);
 
-    // 跳转到某个row entry
+    // Jump to a row entry
     OLAPStatus _seek_to_row_entry(int64_t block_id);
 
     OLAPStatus _reset_readers();
 
-    // 获取当前的table级schema。
+    // Get the current table-level schema.
     inline const TabletSchema& tablet_schema() { return _segment_group->get_tablet_schema(); }
 
     inline const ColumnDataHeaderMessage& _header_message() { return _file_header->message(); }
@@ -240,13 +240,13 @@ private:
     static const int32_t RUN_LENGTH_BYTE_POSITIONS = BYTE_STREAM_POSITIONS + 1;
     static const int32_t BITFIELD_POSITIONS = RUN_LENGTH_BYTE_POSITIONS + 1;
     static const int32_t RUN_LENGTH_INT_POSITIONS = BYTE_STREAM_POSITIONS + 1;
-    // 这个值的含义是，8 = 最大的int类型长度，int reader 一次读取最多会搞出来12个字符（MAX SCOPE）.
-    // 假设完全没压缩，那么就会有这么多个字节，2应该是控制字符？
-    // 那么最多读这么多，就一定足够把下一个字段解出来。
+    // The meaning of this value is that 8 = the largest int type length, and an int reader can read up to 12 characters at a time (MAX SCOPE).
+    // Assuming no compression at all, then there will be so many bytes, 2 should be the control character?
+    // Then read at most so many, it must be enough to solve the next field.
     static const int32_t WORST_UNCOMPRESSED_SLOP = 2 + 8 * 512;
     static const uint32_t CURRENT_COLUMN_DATA_VERSION = 1;
 
-    std::string _file_name; // 文件名
+    std::string _file_name; // File name
     SegmentGroup* _segment_group;
     uint32_t _segment_id;
     // columns that can be used by client. when client seek to range's start or end,
@@ -258,44 +258,44 @@ private:
     //  In this situation, _used_columns contains (k1, k2, v1)
     std::vector<uint32_t> _used_columns;
     UniqueIdSet _load_bf_columns;
-    const Conditions* _conditions;    // 列过滤条件
-    doris::FileHandler _file_handler; // 文件handler
+    const Conditions* _conditions;    // Column filter
+    doris::FileHandler _file_handler; // File handler
 
     const DeleteHandler* _delete_handler = nullptr;
     DelCondSatisfied _delete_status;
 
-    bool _eof; // eof标志
+    bool _eof; // EOF Sign
 
     // If this field is true, client must to call seek_to_block before
     // calling get_block.
     bool _need_to_seek_block = true;
 
-    int64_t _end_block;            // 本次读取的结束块
-    int64_t _current_block_id = 0; // 当前读取到的块
+    int64_t _end_block;            // The end block read this time
+    int64_t _current_block_id = 0; // Block currently read
 
     // this is set by _seek_to_block, when get_block is called, first
     // seek to this block_id, then read block.
     int64_t _next_block_id = 0;
-    int64_t _block_count; // 每一列中，index entry的数目应该相等。
+    int64_t _block_count; // In each column, the number of index entries should be equal.
 
     uint64_t _num_rows_in_block;
     bool _null_supported;
-    uint64_t _header_length; // Header(FixHeader+PB)大小，读数据时需要偏移
+    uint64_t _header_length; // Header(FixHeader+PB) size, need to offset when reading data
 
-    std::vector<ColumnReader*> _column_readers;      // 实际的数据读取器
-    std::vector<StreamIndexReader*> _column_indices; // 保存column的index
+    std::vector<ColumnReader*> _column_readers;      // Actual data reader
+    std::vector<StreamIndexReader*> _column_indices; // Save the index of the column
 
-    UniqueIdSet _include_columns; // 用于判断该列是不是被包含
+    UniqueIdSet _include_columns; // Used to determine whether the column is included
     UniqueIdSet _include_bf_columns;
-    UniqueIdToColumnIdMap _tablet_id_to_unique_id_map;  // tablet id到unique id的映射
-    UniqueIdToColumnIdMap _unique_id_to_tablet_id_map;  // unique id到tablet id的映射
-    UniqueIdToColumnIdMap _unique_id_to_segment_id_map; // unique id到segment id的映射
+    UniqueIdToColumnIdMap _tablet_id_to_unique_id_map;  // The mapping from tablet id to unique id
+    UniqueIdToColumnIdMap _unique_id_to_tablet_id_map;  // Mapping from unique id to tablet id
+    UniqueIdToColumnIdMap _unique_id_to_segment_id_map; // Mapping from unique id to segment id
 
     std::map<ColumnId, StreamIndexReader*> _indices;
-    std::map<StreamName, ReadOnlyFileStream*> _streams; //需要读取的流
-    UniqueIdEncodingMap _encodings_map;                 // 保存encoding
+    std::map<StreamName, ReadOnlyFileStream*> _streams; // Need to read the stream
+    UniqueIdEncodingMap _encodings_map;                 // Save encoding
     std::map<ColumnId, BloomFilterIndexReader*> _bloom_filters;
-    Decompressor _decompressor; //根据压缩格式，设置的解压器
+    Decompressor _decompressor; // According to the compression format, set the decompressor
     StorageByteBuffer* _mmap_buffer;
 
     /*
@@ -309,8 +309,8 @@ private:
     */
     uint8_t* _include_blocks;
     uint32_t _remain_block;
-    bool _need_block_filter; //与include blocks组合使用，如果全不中，就不再读
-    bool _is_using_mmap;     // 这个标记为true时，使用mmap来读取文件
+    bool _need_block_filter; // Used in combination with include blocks, if none of them are in, no longer read
+    bool _is_using_mmap;     // When this flag is true, use mmap to read the file
     bool _is_data_loaded;
     size_t _buffer_size;
 
@@ -322,7 +322,7 @@ private:
 
     StorageByteBuffer* _shared_buffer;
     Cache* _lru_cache;
-    RuntimeState* _runtime_state; // 用于统计内存消耗等运行时信息
+    RuntimeState* _runtime_state; // Used to count runtime information such as memory consumption
     OlapReaderStatistics* _stats;
 
     // Set when seek_to_block is called, valid until next seek_to_block is called.
diff --git a/be/src/olap/serialize.h b/be/src/olap/serialize.h
index ef4a72c..be8fc51 100644
--- a/be/src/olap/serialize.h
+++ b/be/src/olap/serialize.h
@@ -28,33 +28,33 @@ class ReadOnlyFileStream;
 
 namespace ser {
 
-// ZigZag变换: 将符号位放到最低位, 且在负数时翻转其他各位
+// ZigZag transformation: put the sign bit to the lowest bit, and flip the other bits when it is negative
 inline int64_t zig_zag_encode(int64_t value) {
     return (value << 1) ^ (value >> 63);
 }
 
-// ZigZag解码
+// ZigZag decoding
 inline int64_t zig_zag_decode(int64_t value) {
     return (((uint64_t)value) >> 1) ^ -(value & 1);
 }
 
-// 以变长编码写入unsigned数据, 变长编码使用最高位表示是否终止:
-//     - 1 后续还有数据
-//     - 0 这是最后一个字节的数据
-// 所谓unsigned数据, 指数据不容易出现符号位为1, 后续连续为0的情况; 或者从符号位
-// 起连续出现1的情况. 而signed数据表示负数时, 容易出现这种情况, 在这种情况下,
-// 无法有效利用变长编码减少码长, 为此请使用write_var_signed.
+// Variable-length encoding writes unsigned data, and variable-length encoding uses the highest bit to indicate whether to terminate:
+//-1 there will be data behind
+//-0 this is the last byte of the data
+// The so-called unsigned data means that the data is not easy to appear. The sign bit is 1, and the subsequent consecutive 0; or from the sign bit
+// 1 Continuous occurrence. This situation is prone to occur when the signature data represents a negative number. under these circumstances,
+// Variable length coding cannot effectively reduce the code length, for this, please use write_var_signed.
 OLAPStatus write_var_unsigned(OutStream* stream, int64_t value);
 
-// 以变长编码写入signed数据, 为了避免负数高位连续的1的问题, 将数据进行ZigZag变换
+// Write signed data with variable length encoding, in order to avoid the problem of continuous 1s in the high bits of negative numbers, the data is ZigZag transformed
 inline OLAPStatus write_var_signed(OutStream* stream, int64_t value) {
     return write_var_unsigned(stream, zig_zag_encode(value));
 }
 
-// 读入write_var_unsigned编码的数据
+// Read in write_var_unsigned encoded data
 OLAPStatus read_var_unsigned(ReadOnlyFileStream* stream, int64_t* value);
 
-// 读入write_var_signed编码的数据
+// Read in write_var_signed encoded data
 inline OLAPStatus read_var_signed(ReadOnlyFileStream* stream, int64_t* value) {
     OLAPStatus res = read_var_unsigned(stream, value);
 
@@ -65,10 +65,11 @@ inline OLAPStatus read_var_signed(ReadOnlyFileStream* stream, int64_t* value) {
     return res;
 }
 
-// 在RunLengthIntegerWriter中的bit_width都是5bit编码, 这样最多支持2^5=32种比特位
-// 长. 然而, 需要表示最多1~64位, 共64种比特位长, 于是在64种比特位长中取32种. 对
-// 其他剩余32个不在这32种的比特长度向上对齐到最接近的一个比特位长.
-// FixedBitSize给出了32种比特位长
+// The bit_width in RunLengthIntegerWriter is all 5bit encoding, 
+// so it supports up to 2^5=32 bit lengths. However, it needs to represent at most 1~64 bits, 
+// a total of 64 bit lengths, so in 64 bit lengths Take 32 types. 
+// The remaining 32 bit lengths that are not in these 32 types are aligned up to the nearest bit length.
+// FixedBitSize gives 32 bit lengths
 enum FixedBitSize {
     ONE = 0,
     TWO,
@@ -121,7 +122,7 @@ inline void compute_hists(int64_t* data, uint16_t count, uint16_t hists[65]) {
     }
 }
 
-// 返回大于等于n且最接近n的FixedBiteSize
+// Returns the FixedBiteSize greater than or equal to n and closest to n
 inline uint32_t get_closet_fixed_bits(uint32_t n) {
     static uint8_t bits_map[65] = {
             1,                              // 0
@@ -149,29 +150,29 @@ inline uint32_t percentile_bits_with_hist(uint16_t hists[65], uint16_t count, do
     return 0;
 }
 
-// 首先计算value的比特位长(1所在的最高位), 再使用get_closet_fixed_bits
-// 返回最接近的FixedBiteSize
+// First calculate the bit length of value (the highest bit of 1), and then use get_closet_fixed_bits
+// Return the closest FixedBiteSize
 uint32_t find_closet_num_bits(int64_t value);
 
 // Read n bytes in big endian order and convert to long
 OLAPStatus bytes_to_long_be(ReadOnlyFileStream* stream, int32_t n, int64_t* value);
 
-// 将位长编码为32个定长比特位之一, 返回值为0~31之间
+// Encode the bit length as one of 32 fixed-length bits, and the return value is between 0 and 31
 uint32_t encode_bit_width(uint32_t n);
 
-// 解码encode_bit_width编码的结果
+// Decode the result of encode_bit_width encoding
 uint32_t decode_bit_width(uint32_t n);
 
-// 将data中的数据按比特位长排序, 返回给定比例p下, 最大位长.
-// 例如: p == 1.0, 表示所有的数据的最大位长
-//       p == 0.9, 表示比特位最短的90%的数据的最大位长
-//       p == 0.5, 表示比特位最短的50%的数据的最大位长
+// Sort the data in data according to the bit length, and return the maximum bit length under a given ratio p.
+// For example: p == 1.0, which means the maximum bit length of all data
+// p == 0.9, which means the maximum bit length of 90% of the data with the shortest bit position
+// p == 0.5, which means the maximum bit length of the 50% data with the shortest bit position
 uint32_t percentile_bits(int64_t* data, uint16_t count, double p);
 
-// 以紧致方式向output输出一组整数
+// Output a set of integers to output in a compact manner
 OLAPStatus write_ints(OutStream* output, int64_t* data, uint32_t count, uint32_t bit_width);
 
-// 读取write_ints输出的数据
+// Read the data output by write_ints
 OLAPStatus read_ints(ReadOnlyFileStream* input, int64_t* data, uint32_t count, uint32_t bit_width);
 
 // Do not want to use Guava LongMath.checkedSubtract() here as it will throw
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index 55d56a9..308900d 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -242,10 +242,10 @@ private:
 
     void _start_clean_cache();
 
-    // 磁盘状态监测。监测unused_flag路劲新的对应root_path unused标识位，
-    // 当检测到有unused标识时，从内存中删除对应表信息，磁盘数据不动。
-    // 当磁盘状态为不可用，但未检测到unused标识时，需要从root_path上
-    // 重新加载数据。
+    // Disk status monitoring. Monitoring unused_flag Road King's new corresponding root_path unused flag,
+    // When the unused mark is detected, the corresponding table information is deleted from the memory, and the disk data does not move.
+    // When the disk status is unusable, but the unused logo is not detected, you need to download it from root_path
+    // Reload the data.
     void _start_disk_stat_monitor();
 
     void _compaction_tasks_producer_callback();
@@ -265,7 +265,7 @@ private:
     struct CompactionCandidate {
         CompactionCandidate(uint32_t nicumulative_compaction_, int64_t tablet_id_, uint32_t index_)
                 : nice(nicumulative_compaction_), tablet_id(tablet_id_), disk_index(index_) {}
-        uint32_t nice; // 优先度
+        uint32_t nice; // priority
         int64_t tablet_id;
         uint32_t disk_index = -1;
     };
diff --git a/be/src/olap/stream_index_common.h b/be/src/olap/stream_index_common.h
index baf91b6..dc4ab19 100644
--- a/be/src/olap/stream_index_common.h
+++ b/be/src/olap/stream_index_common.h
@@ -26,12 +26,12 @@
 
 namespace doris {
 
-// 描述streamindex的格式
+// Describe the format of streamindex
 struct StreamIndexHeader {
-    uint64_t block_count;      // 本index中block的个数
-    uint32_t position_format;  // position的个数，每个长度为sizeof(uint32_t)
-    uint32_t statistic_format; // 统计信息格式，实际上就是OLAP_FIELD_TYPE_XXX
-    // 为OLAP_FIELD_TYPE_NONE时, 表示无索引
+    uint64_t block_count;      // The number of blocks in this index
+    uint32_t position_format;  // The number of positions, each length is sizeof(uint32_t)
+    uint32_t statistic_format; // The statistical information format is actually OLAP_FIELD_TYPE_XXX
+    // When it is OLAP_FIELD_TYPE_NONE, it means no index
     StreamIndexHeader()
             : block_count(0), position_format(0), statistic_format(OLAP_FIELD_TYPE_NONE) {}
 } __attribute__((packed));
@@ -39,16 +39,16 @@ struct StreamIndexHeader {
 // TODO: string type(char, varchar) has no columnar statistics at present.
 // when you want to add columnar statistics for string type,
 // don't forget to convert storage layout between disk and memory.
-// 处理列的统计信息，读写一体，也可以分开。
+// Processing column statistics, read and write in one, can also be separated.
 class ColumnStatistics {
 public:
     ColumnStatistics();
     ~ColumnStatistics();
 
-    // 初始化，需要给FieldType，用来初始化最大最小值
-    // 使用前必须首先初始化，否则无效
+    // Initialization, FieldType needs to be used to initialize the maximum and minimum values
+    // It must be initialized before use, otherwise it will be invalid
     OLAPStatus init(const FieldType& type, bool null_supported);
-    // 只是reset最大和最小值，将最小值设置为MAX，将最大值设置为MIN。
+    // Just reset the maximum and minimum values, set the minimum value to MAX, and the maximum value to MIN.
     void reset();
 
     template <typename CellType>
@@ -64,17 +64,17 @@ public:
         }
     }
 
-    // 合并，将另一个统计信息和入当前统计中
+    // Combine, merge another statistic information into the current statistic
     void merge(ColumnStatistics* other);
-    // 返回最大最小值“输出时”占用的内存，而“不是?
-    // ??当前结构占用的内存大小
+    // It returns the memory occupied by the maximum and minimum values "when outputting", and "isn't it?"
+    // ?? The size of the memory occupied by the current structure
     size_t size() const;
-    // 将最大最小值attach到给定的buffer上
+    // Attach the maximum and minimum values to the given buffer
     void attach(char* buffer);
-    // 将最大最小值输出到buffer中
+    // Output the maximum and minimum values to the buffer
     OLAPStatus write_to_buffer(char* buffer, size_t size);
 
-    // 属性
+    // Attributes
     const WrapperField* minimum() const { return _minimum; }
     const WrapperField* maximum() const { return _maximum; }
     std::pair<WrapperField*, WrapperField*> pair() const {
@@ -85,8 +85,9 @@ public:
 protected:
     WrapperField* _minimum;
     WrapperField* _maximum;
-    // 由于暂时不支持string的统计信息，为了方便直接定义长度
-    // 也可以每次都分配
+    // As the statistical information of string is not supported for the time being,
+    // the length is directly defined for convenience
+    // Can also be assigned every time
     bool _ignored;
     bool _null_supported;
 };
diff --git a/be/test/exec/csv_scan_node_test.cpp b/be/test/exec/csv_scan_node_test.cpp
index 000b135..dd72730 100644
--- a/be/test/exec/csv_scan_node_test.cpp
+++ b/be/test/exec/csv_scan_node_test.cpp
@@ -418,10 +418,10 @@ TEST_F(CsvScanNodeTest, wrong_fix_len_string_format_test) {
     ASSERT_TRUE(!scan_node.close(_state).ok());
 }
 
-// 待补充测试case
-// 1. 字符串导入
-// 2. 不指定有默认值的列
-// 3. 文件中有但表中没有的列，导入命令中跳过该列
+// To be added test case
+// 1. String import
+// 2. Do not specify columns with default values
+// 3. If there is a column in the file but not in the table, the column is skipped in the import command
 // 4. max_filter_ratio
 
 } // end namespace doris
diff --git a/be/test/exec/parquet_scanner_test.cpp b/be/test/exec/parquet_scanner_test.cpp
index e6b6114..e299f71 100644
--- a/be/test/exec/parquet_scanner_test.cpp
+++ b/be/test/exec/parquet_scanner_test.cpp
@@ -95,7 +95,8 @@ int ParquetScannerTest::create_src_tuple(TDescriptorTable& t_desc_table, int nex
         }
         slot_desc.slotType = type;
         slot_desc.columnPos = i;
-        slot_desc.byteOffset = i * 16 + 8; // 跳过前8个字节 这8个字节用于表示字段是否为null值
+        // Skip the first 8 bytes These 8 bytes are used to indicate whether the field is a null value
+        slot_desc.byteOffset = i * 16 + 8; 
         slot_desc.nullIndicatorByte = i / 8;
         slot_desc.nullIndicatorBit = i % 8;
         slot_desc.colName = columnNames[i];
@@ -109,7 +110,8 @@ int ParquetScannerTest::create_src_tuple(TDescriptorTable& t_desc_table, int nex
         // TTupleDescriptor source
         TTupleDescriptor t_tuple_desc;
         t_tuple_desc.id = TUPLE_ID_SRC;
-        t_tuple_desc.byteSize = COLUMN_NUMBERS * 16 + 8; //此处8字节为了处理null值
+        //Here 8 bytes in order to handle null values
+        t_tuple_desc.byteSize = COLUMN_NUMBERS * 16 + 8; 
         t_tuple_desc.numNullBytes = 0;
         t_tuple_desc.tableId = 0;
         t_tuple_desc.__isset.tableId = true;
@@ -119,7 +121,7 @@ int ParquetScannerTest::create_src_tuple(TDescriptorTable& t_desc_table, int nex
 }
 
 int ParquetScannerTest::create_dst_tuple(TDescriptorTable& t_desc_table, int next_slot_id) {
-    int32_t byteOffset = 8; // 跳过前8个字节 这8个字节用于表示字段是否为null值
+    int32_t byteOffset = 8; // Skip the first 8 bytes These 8 bytes are used to indicate whether the field is a null value
     {                       //log_version
         TSlotDescriptor slot_desc;
 
@@ -237,7 +239,7 @@ int ParquetScannerTest::create_dst_tuple(TDescriptorTable& t_desc_table, int nex
         // TTupleDescriptor dest
         TTupleDescriptor t_tuple_desc;
         t_tuple_desc.id = TUPLE_ID_DST;
-        t_tuple_desc.byteSize = byteOffset + 8; //此处8字节为了处理null值
+        t_tuple_desc.byteSize = byteOffset + 8; //Here 8 bytes in order to handle null values
         t_tuple_desc.numNullBytes = 0;
         t_tuple_desc.tableId = 0;
         t_tuple_desc.__isset.tableId = true;

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org