You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by qi...@apache.org on 2022/04/08 07:42:07 UTC

[iotdb] branch master updated: [IOTDB-2238] Library-UDF Data Quality Functions (#4691)

This is an automated email from the ASF dual-hosted git repository.

qiaojialin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new e23879045d [IOTDB-2238] Library-UDF Data Quality Functions (#4691)
e23879045d is described below

commit e23879045d8379ab3e99fa960012dbf58fef7771
Author: Pengyu Chen <48...@users.noreply.github.com>
AuthorDate: Fri Apr 8 15:42:01 2022 +0800

    [IOTDB-2238] Library-UDF Data Quality Functions (#4691)
---
 docs/UserGuide/Library-UDF/Data-Quality.md         | 523 +++++++++++++++++++++
 docs/UserGuide/Library-UDF/Get-Started.md          |  61 +++
 docs/zh/UserGuide/Library-UDF/Data-Quality.md      | 510 ++++++++++++++++++++
 docs/zh/UserGuide/Library-UDF/Get-Started.md       |  58 +++
 .../iotdb/library/dquality/UDTFCompleteness.java   |  76 +++
 .../iotdb/library/dquality/UDTFConsistency.java    |  74 +++
 .../iotdb/library/dquality/UDTFTimeliness.java     |  73 +++
 .../iotdb/library/dquality/UDTFValidity.java       |  73 +++
 .../library/dquality/util/TimeSeriesQuality.java   | 243 ++++++++++
 .../iotdb/library/dquality/DQualityTests.java      | 385 +++++++++++++++
 site/src/main/.vuepress/config.js                  |  16 +
 11 files changed, 2092 insertions(+)

diff --git a/docs/UserGuide/Library-UDF/Data-Quality.md b/docs/UserGuide/Library-UDF/Data-Quality.md
new file mode 100644
index 0000000000..bbf12819dd
--- /dev/null
+++ b/docs/UserGuide/Library-UDF/Data-Quality.md
@@ -0,0 +1,523 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+    
+        http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+
+# Data Quality
+
+## Completeness
+
+### Usage
+This function is used to calculate the completeness of time series. The input series are divided into several continuous and non overlapping windows. The timestamp of the first data point and the completeness of each window will be output.
+
+**Name:** COMPLETENESS
+
+**Input Series:** Only support a single input series. The type is INT32 / INT64 / FLOAT / DOUBLE.
+
+**Parameters:**
+
++ `window`: The size of each window. It is a positive integer or a positive number with an unit. The former is the number of data points in each window. The number of data points in the last window may be less than it. The latter is the time of the window. The unit is 'ms' for millisecond, 's' for second, 'm' for minute, 'h' for hour and 'd' for day. By default, all input data belongs to the same window.
++ `downtime`: Whether the downtime exception is considered in the calculation of completeness. It is 'true' or 'false' (default). When considering the downtime exception, long-term missing data will be considered as downtime exception without any influence on completeness.
+
+**Output Series:** Output a single series. The type is DOUBLE. The range of each value is [0,1].
+
+**Note:** Only when the number of data points in the window exceeds 10, the calculation will be performed. Otherwise, the window will be ignored and nothing will be output.
+
+
+
+### Examples
+
+#### Default Parameters
+
+With default parameters, this function will regard all input data as the same window.
+
+Input series:
+
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
++-----------------------------+---------------+
+```
+
+SQL for query:
+
+```sql
+select completeness(s1) from root.test.d1 where time <= 2020-01-01 00:00:30
+```
+
+Output series:
+
+```
++-----------------------------+-----------------------------+
+|                         Time|completeness(root.test.d1.s1)|
++-----------------------------+-----------------------------+
+|2020-01-01T00:00:02.000+08:00|                        0.875|
++-----------------------------+-----------------------------+
+```
+
+#### Specific Window Size
+
+When the window size is given, this function will divide the input data as multiple windows.
+
+Input series:
+
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
+|2020-01-01T00:00:32.000+08:00|          130.0|
+|2020-01-01T00:00:34.000+08:00|          132.0|
+|2020-01-01T00:00:36.000+08:00|          134.0|
+|2020-01-01T00:00:38.000+08:00|          136.0|
+|2020-01-01T00:00:40.000+08:00|          138.0|
+|2020-01-01T00:00:42.000+08:00|          140.0|
+|2020-01-01T00:00:44.000+08:00|          142.0|
+|2020-01-01T00:00:46.000+08:00|          144.0|
+|2020-01-01T00:00:48.000+08:00|          146.0|
+|2020-01-01T00:00:50.000+08:00|          148.0|
+|2020-01-01T00:00:52.000+08:00|          150.0|
+|2020-01-01T00:00:54.000+08:00|          152.0|
+|2020-01-01T00:00:56.000+08:00|          154.0|
+|2020-01-01T00:00:58.000+08:00|          156.0|
+|2020-01-01T00:01:00.000+08:00|          158.0|
++-----------------------------+---------------+
+```
+
+SQL for query:
+
+```sql
+select completeness(s1,"window"="15") from root.test.d1 where time <= 2020-01-01 00:01:00
+```
+
+Output series:
+
+```
++-----------------------------+--------------------------------------------+
+|                         Time|completeness(root.test.d1.s1, "window"="15")|
++-----------------------------+--------------------------------------------+
+|2020-01-01T00:00:02.000+08:00|                                       0.875|
+|2020-01-01T00:00:32.000+08:00|                                         1.0|
++-----------------------------+--------------------------------------------+
+```
+
+## Consistency
+
+### Usage
+This function is used to calculate the consistency of time series. The input series are divided into several continuous and non overlapping windows. The timestamp of the first data point and the consistency of each window will be output.
+
+**Name:** CONSISTENCY
+
+**Input Series:** Only support a single input series. The type is INT32 / INT64 / FLOAT / DOUBLE.
+
+**Parameters:**
+
++ `window`: The size of each window. It is a positive integer or a positive number with an unit. The former is the number of data points in each window. The number of data points in the last window may be less than it. The latter is the time of the window. The unit is 'ms' for millisecond, 's' for second, 'm' for minute, 'h' for hour and 'd' for day. By default, all input data belongs to the same window.
+
+**Output Series:** Output a single series. The type is DOUBLE. The range of each value is [0,1].
+
+**Note:** Only when the number of data points in the window exceeds 10, the calculation will be performed. Otherwise, the window will be ignored and nothing will be output.
+
+
+
+### Examples
+
+#### Default Parameters
+
+With default parameters, this function will regard all input data as the same window.
+
+Input series:
+
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
++-----------------------------+---------------+
+```
+
+SQL for query:
+
+```sql
+select consistency(s1) from root.test.d1 where time <= 2020-01-01 00:00:30
+```
+
+Output series:
+
+```
++-----------------------------+----------------------------+
+|                         Time|consistency(root.test.d1.s1)|
++-----------------------------+----------------------------+
+|2020-01-01T00:00:02.000+08:00|          0.9333333333333333|
++-----------------------------+----------------------------+
+```
+
+#### Specific Window Size
+
+When the window size is given, this function will divide the input data as multiple windows.
+
+Input series:
+
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
+|2020-01-01T00:00:32.000+08:00|          130.0|
+|2020-01-01T00:00:34.000+08:00|          132.0|
+|2020-01-01T00:00:36.000+08:00|          134.0|
+|2020-01-01T00:00:38.000+08:00|          136.0|
+|2020-01-01T00:00:40.000+08:00|          138.0|
+|2020-01-01T00:00:42.000+08:00|          140.0|
+|2020-01-01T00:00:44.000+08:00|          142.0|
+|2020-01-01T00:00:46.000+08:00|          144.0|
+|2020-01-01T00:00:48.000+08:00|          146.0|
+|2020-01-01T00:00:50.000+08:00|          148.0|
+|2020-01-01T00:00:52.000+08:00|          150.0|
+|2020-01-01T00:00:54.000+08:00|          152.0|
+|2020-01-01T00:00:56.000+08:00|          154.0|
+|2020-01-01T00:00:58.000+08:00|          156.0|
+|2020-01-01T00:01:00.000+08:00|          158.0|
++-----------------------------+---------------+
+```
+
+SQL for query:
+
+```sql
+select consistency(s1,"window"="15") from root.test.d1 where time <= 2020-01-01 00:01:00
+```
+
+Output series:
+
+```
++-----------------------------+-------------------------------------------+
+|                         Time|consistency(root.test.d1.s1, "window"="15")|
++-----------------------------+-------------------------------------------+
+|2020-01-01T00:00:02.000+08:00|                         0.9333333333333333|
+|2020-01-01T00:00:32.000+08:00|                                        1.0|
++-----------------------------+-------------------------------------------+
+```
+
+## Timeliness
+
+### Usage
+This function is used to calculate the timeliness of time series. The input series are divided into several continuous and non overlapping windows. The timestamp of the first data point and the timeliness of each window will be output.
+
+**Name:** TIMELINESS
+
+**Input Series:** Only support a single input series. The type is INT32 / INT64 / FLOAT / DOUBLE.
+
+**Parameters:**
+
++ `window`: The size of each window. It is a positive integer or a positive number with an unit. The former is the number of data points in each window. The number of data points in the last window may be less than it. The latter is the time of the window. The unit is 'ms' for millisecond, 's' for second, 'm' for minute, 'h' for hour and 'd' for day. By default, all input data belongs to the same window.
+
+**Output Series:** Output a single series. The type is DOUBLE. The range of each value is [0,1].
+
+**Note:** Only when the number of data points in the window exceeds 10, the calculation will be performed. Otherwise, the window will be ignored and nothing will be output.
+
+
+
+### Examples
+
+#### Default Parameters
+
+With default parameters, this function will regard all input data as the same window.
+
+Input series:
+
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
++-----------------------------+---------------+
+```
+
+SQL for query:
+
+```sql
+select timeliness(s1) from root.test.d1 where time <= 2020-01-01 00:00:30
+```
+
+Output series:
+
+```
++-----------------------------+---------------------------+
+|                         Time|timeliness(root.test.d1.s1)|
++-----------------------------+---------------------------+
+|2020-01-01T00:00:02.000+08:00|         0.9333333333333333|
++-----------------------------+---------------------------+
+```
+
+#### Specific Window Size
+
+When the window size is given, this function will divide the input data as multiple windows.
+
+Input series:
+
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
+|2020-01-01T00:00:32.000+08:00|          130.0|
+|2020-01-01T00:00:34.000+08:00|          132.0|
+|2020-01-01T00:00:36.000+08:00|          134.0|
+|2020-01-01T00:00:38.000+08:00|          136.0|
+|2020-01-01T00:00:40.000+08:00|          138.0|
+|2020-01-01T00:00:42.000+08:00|          140.0|
+|2020-01-01T00:00:44.000+08:00|          142.0|
+|2020-01-01T00:00:46.000+08:00|          144.0|
+|2020-01-01T00:00:48.000+08:00|          146.0|
+|2020-01-01T00:00:50.000+08:00|          148.0|
+|2020-01-01T00:00:52.000+08:00|          150.0|
+|2020-01-01T00:00:54.000+08:00|          152.0|
+|2020-01-01T00:00:56.000+08:00|          154.0|
+|2020-01-01T00:00:58.000+08:00|          156.0|
+|2020-01-01T00:01:00.000+08:00|          158.0|
++-----------------------------+---------------+
+```
+
+SQL for query:
+
+```sql
+select timeliness(s1,"window"="15") from root.test.d1 where time <= 2020-01-01 00:01:00
+```
+
+Output series:
+
+```
++-----------------------------+------------------------------------------+
+|                         Time|timeliness(root.test.d1.s1, "window"="15")|
++-----------------------------+------------------------------------------+
+|2020-01-01T00:00:02.000+08:00|                        0.9333333333333333|
+|2020-01-01T00:00:32.000+08:00|                                       1.0|
++-----------------------------+------------------------------------------+
+```
+
+## Validity
+
+### Usage
+This function is used to calculate the Validity of time series. The input series are divided into several continuous and non overlapping windows. The timestamp of the first data point and the Validity of each window will be output.
+
+**Name:** VALIDITY
+
+**Input Series:** Only support a single input series. The type is INT32 / INT64 / FLOAT / DOUBLE.
+
+**Parameters:**
+
++ `window`: The size of each window. It is a positive integer or a positive number with an unit. The former is the number of data points in each window. The number of data points in the last window may be less than it. The latter is the time of the window. The unit is 'ms' for millisecond, 's' for second, 'm' for minute, 'h' for hour and 'd' for day. By default, all input data belongs to the same window.
+
+**Output Series:** Output a single series. The type is DOUBLE. The range of each value is [0,1].
+
+**Note:** Only when the number of data points in the window exceeds 10, the calculation will be performed. Otherwise, the window will be ignored and nothing will be output.
+
+
+
+### Examples
+
+#### Default Parameters
+
+With default parameters, this function will regard all input data as the same window.
+
+Input series:
+
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
++-----------------------------+---------------+
+```
+
+SQL for query:
+
+```sql
+select Validity(s1) from root.test.d1 where time <= 2020-01-01 00:00:30
+```
+
+Output series:
+
+```
++-----------------------------+-------------------------+
+|                         Time|validity(root.test.d1.s1)|
++-----------------------------+-------------------------+
+|2020-01-01T00:00:02.000+08:00|       0.8833333333333333|
++-----------------------------+-------------------------+
+```
+
+#### Specific Window Size
+
+When the window size is given, this function will divide the input data as multiple windows.
+
+Input series:
+
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
+|2020-01-01T00:00:32.000+08:00|          130.0|
+|2020-01-01T00:00:34.000+08:00|          132.0|
+|2020-01-01T00:00:36.000+08:00|          134.0|
+|2020-01-01T00:00:38.000+08:00|          136.0|
+|2020-01-01T00:00:40.000+08:00|          138.0|
+|2020-01-01T00:00:42.000+08:00|          140.0|
+|2020-01-01T00:00:44.000+08:00|          142.0|
+|2020-01-01T00:00:46.000+08:00|          144.0|
+|2020-01-01T00:00:48.000+08:00|          146.0|
+|2020-01-01T00:00:50.000+08:00|          148.0|
+|2020-01-01T00:00:52.000+08:00|          150.0|
+|2020-01-01T00:00:54.000+08:00|          152.0|
+|2020-01-01T00:00:56.000+08:00|          154.0|
+|2020-01-01T00:00:58.000+08:00|          156.0|
+|2020-01-01T00:01:00.000+08:00|          158.0|
++-----------------------------+---------------+
+```
+
+SQL for query:
+
+```sql
+select Validity(s1,"window"="15") from root.test.d1 where time <= 2020-01-01 00:01:00
+```
+
+Output series:
+
+```
++-----------------------------+----------------------------------------+
+|                         Time|validity(root.test.d1.s1, "window"="15")|
++-----------------------------+----------------------------------------+
+|2020-01-01T00:00:02.000+08:00|                      0.8833333333333333|
+|2020-01-01T00:00:32.000+08:00|                                     1.0|
++-----------------------------+----------------------------------------+
+```
\ No newline at end of file
diff --git a/docs/UserGuide/Library-UDF/Get-Started.md b/docs/UserGuide/Library-UDF/Get-Started.md
new file mode 100644
index 0000000000..4f0f0d7223
--- /dev/null
+++ b/docs/UserGuide/Library-UDF/Get-Started.md
@@ -0,0 +1,61 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+    
+        http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+# Get Started
+
+## About UDF Library
+
+For applications based on time series data, data quality is vital.
+**UDF Library** is IoTDB User Defined Functions (UDF) about data quality, including data profiling, data quality evalution and data repairing.
+It effectively meets the demand for data quality in the industrial field.
+
+### Contact
+
++ Email: iotdb-quality@protonmail.com
+
+## Quick Start
+
+1. Download the JAR with all dependencies and the script of registering UDF.
+2. Copy the JAR package to `ext\udf` under the directory of IoTDB system.
+3. Run `sbin\start-server.bat` (for Windows) or `sbin\start-server.sh` (for Linux or MacOS) to start IoTDB server.
+4. Copy the script to the directory of IoTDB system (under the root directory, at the same level as `sbin`), modify the parameters in the script if needed and run it to register UDF.
+
+## Download
+
+You can download the following files:
+
+<table>
+    <tr>
+        <th align="center">Version</th>
+        <th align="center">Jar with all dependencies</th>
+        <th align="center" colspan="2">Script of registering UDF</th>
+        <th align="center" colspan="2">User Manual</th>
+        <th align="center">Supported IoTDB Version</th>
+    </tr>
+    <tr>
+        <td align="center">In progress</td>
+        <td align="center"><a href="https://thulab.github.io/iotdb-quality/download/iotdb-quality-2.0.0-SNAPSHOT-jar-with-dependencies.jar">Jar with all dependencies</a></td>
+        <td align="center"><a href="https://thulab.github.io/iotdb-quality/download/register-UDF.bat">Windows</a></td>
+        <td align="center"><a href="https://thulab.github.io/iotdb-quality/download/register-UDF.sh">Linux/MacOS</a></td>
+        <td align="center"><a href="https://thulab.github.io/iotdb-quality/download/UserManual_en.pdf">English</a></td>
+        <td align="center"><a href="https://thulab.github.io/iotdb-quality/download/UserManual_zh.pdf">Chinese</a></td>
+        <td align="center">>= 0.12.0</td>
+    </tr>
+</table>
diff --git a/docs/zh/UserGuide/Library-UDF/Data-Quality.md b/docs/zh/UserGuide/Library-UDF/Data-Quality.md
new file mode 100644
index 0000000000..38cc3daf9b
--- /dev/null
+++ b/docs/zh/UserGuide/Library-UDF/Data-Quality.md
@@ -0,0 +1,510 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+    
+        http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+# 数据质量
+## Completeness
+
+### 函数简介
+本函数用于计算时间序列的完整性。将输入序列划分为若干个连续且不重叠的窗口,分别计算每一个窗口的完整性,并输出窗口第一个数据点的时间戳和窗口的完整性。
+
+**函数名:** COMPLETENESS
+
+**输入序列:** 仅支持单个输入序列,类型为 INT32 / INT64 / FLOAT / DOUBLE。
+
+**参数:**
+
++ `window`:窗口大小,它是一个大于0的整数或者一个有单位的正数。前者代表每一个窗口包含的数据点数目,最后一个窗口的数据点数目可能会不足;后者代表窗口的时间跨度,目前支持五种单位,分别是'ms'(毫秒)、's'(秒)、'm'(分钟)、'h'(小时)和'd'(天)。缺省情况下,全部输入数据都属于同一个窗口。
++ `downtime`:完整性计算是否考虑停机异常。它的取值为 'true' 或 'false',默认值为 'true'. 在考虑停机异常时,长时间的数据缺失将被视作停机,不对完整性产生影响。
+
+**输出序列:** 输出单个序列,类型为DOUBLE,其中每一个数据点的值的范围都是 [0,1].
+
+**提示:** 只有当窗口内的数据点数目超过10时,才会进行完整性计算。否则,该窗口将被忽略,不做任何输出。
+
+
+### 使用示例
+
+#### 参数缺省
+
+在参数缺省的情况下,本函数将会把全部输入数据都作为同一个窗口计算完整性。
+
+输入序列:
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
++-----------------------------+---------------+
+```
+
+用于查询的SQL语句:
+
+```sql
+select completeness(s1) from root.test.d1 where time <= 2020-01-01 00:00:30
+```
+
+输出序列:
+
+```
++-----------------------------+-----------------------------+
+|                         Time|completeness(root.test.d1.s1)|
++-----------------------------+-----------------------------+
+|2020-01-01T00:00:02.000+08:00|                        0.875|
++-----------------------------+-----------------------------+
+```
+
+#### 指定窗口大小
+
+在指定窗口大小的情况下,本函数会把输入数据划分为若干个窗口计算完整性。
+
+输入序列:
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
+|2020-01-01T00:00:32.000+08:00|          130.0|
+|2020-01-01T00:00:34.000+08:00|          132.0|
+|2020-01-01T00:00:36.000+08:00|          134.0|
+|2020-01-01T00:00:38.000+08:00|          136.0|
+|2020-01-01T00:00:40.000+08:00|          138.0|
+|2020-01-01T00:00:42.000+08:00|          140.0|
+|2020-01-01T00:00:44.000+08:00|          142.0|
+|2020-01-01T00:00:46.000+08:00|          144.0|
+|2020-01-01T00:00:48.000+08:00|          146.0|
+|2020-01-01T00:00:50.000+08:00|          148.0|
+|2020-01-01T00:00:52.000+08:00|          150.0|
+|2020-01-01T00:00:54.000+08:00|          152.0|
+|2020-01-01T00:00:56.000+08:00|          154.0|
+|2020-01-01T00:00:58.000+08:00|          156.0|
+|2020-01-01T00:01:00.000+08:00|          158.0|
++-----------------------------+---------------+
+```
+
+用于查询的 SQL 语句:
+
+```sql
+select completeness(s1,"window"="15") from root.test.d1 where time <= 2020-01-01 00:01:00
+```
+
+输出序列:
+
+```
++-----------------------------+--------------------------------------------+
+|                         Time|completeness(root.test.d1.s1, "window"="15")|
++-----------------------------+--------------------------------------------+
+|2020-01-01T00:00:02.000+08:00|                                       0.875|
+|2020-01-01T00:00:32.000+08:00|                                         1.0|
++-----------------------------+--------------------------------------------+
+```
+
+## Consistency
+
+### 函数简介
+本函数用于计算时间序列的一致性。将输入序列划分为若干个连续且不重叠的窗口,分别计算每一个窗口的一致性,并输出窗口第一个数据点的时间戳和窗口的时效性。
+
+**函数名:** CONSISTENCY
+
+**输入序列:** 仅支持单个输入序列,类型为 INT32 / INT64 / FLOAT / DOUBLE
+
+**参数:**
+
++ `window`:窗口大小,它是一个大于0的整数或者一个有单位的正数。前者代表每一个窗口包含的数据点数目,最后一个窗口的数据点数目可能会不足;后者代表窗口的时间跨度,目前支持五种单位,分别是 'ms'(毫秒)、's'(秒)、'm'(分钟)、'h'(小时)和'd'(天)。缺省情况下,全部输入数据都属于同一个窗口。
+
+**输出序列:** 输出单个序列,类型为DOUBLE,其中每一个数据点的值的范围都是 [0,1].
+
+**提示:** 只有当窗口内的数据点数目超过10时,才会进行一致性计算。否则,该窗口将被忽略,不做任何输出。
+
+
+### 使用示例
+
+#### 参数缺省
+
+在参数缺省的情况下,本函数将会把全部输入数据都作为同一个窗口计算一致性。
+
+输入序列:
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
++-----------------------------+---------------+
+```
+
+用于查询的SQL语句:
+
+```sql
+select consistency(s1) from root.test.d1 where time <= 2020-01-01 00:00:30
+```
+
+输出序列:
+
+```
++-----------------------------+----------------------------+
+|                         Time|consistency(root.test.d1.s1)|
++-----------------------------+----------------------------+
+|2020-01-01T00:00:02.000+08:00|          0.9333333333333333|
++-----------------------------+----------------------------+
+```
+
+#### 指定窗口大小
+
+在指定窗口大小的情况下,本函数会把输入数据划分为若干个窗口计算一致性。
+
+输入序列:
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
+|2020-01-01T00:00:32.000+08:00|          130.0|
+|2020-01-01T00:00:34.000+08:00|          132.0|
+|2020-01-01T00:00:36.000+08:00|          134.0|
+|2020-01-01T00:00:38.000+08:00|          136.0|
+|2020-01-01T00:00:40.000+08:00|          138.0|
+|2020-01-01T00:00:42.000+08:00|          140.0|
+|2020-01-01T00:00:44.000+08:00|          142.0|
+|2020-01-01T00:00:46.000+08:00|          144.0|
+|2020-01-01T00:00:48.000+08:00|          146.0|
+|2020-01-01T00:00:50.000+08:00|          148.0|
+|2020-01-01T00:00:52.000+08:00|          150.0|
+|2020-01-01T00:00:54.000+08:00|          152.0|
+|2020-01-01T00:00:56.000+08:00|          154.0|
+|2020-01-01T00:00:58.000+08:00|          156.0|
+|2020-01-01T00:01:00.000+08:00|          158.0|
++-----------------------------+---------------+
+```
+
+用于查询的SQL语句:
+
+```sql
+select consistency(s1,"window"="15") from root.test.d1 where time <= 2020-01-01 00:01:00
+```
+
+输出序列:
+
+```
++-----------------------------+-------------------------------------------+
+|                         Time|consistency(root.test.d1.s1, "window"="15")|
++-----------------------------+-------------------------------------------+
+|2020-01-01T00:00:02.000+08:00|                         0.9333333333333333|
+|2020-01-01T00:00:32.000+08:00|                                        1.0|
++-----------------------------+-------------------------------------------+
+```
+
+## Timeliness
+
+### 函数简介
+本函数用于计算时间序列的时效性。将输入序列划分为若干个连续且不重叠的窗口,分别计算每一个窗口的时效性,并输出窗口第一个数据点的时间戳和窗口的时效性。
+
+**函数名:** TIMELINESS
+
+**输入序列:** 仅支持单个输入序列,类型为 INT32 / INT64 / FLOAT / DOUBLE
+
+**参数:**
+
++ `window`:窗口大小,它是一个大于0的整数或者一个有单位的正数。前者代表每一个窗口包含的数据点数目,最后一个窗口的数据点数目可能会不足;后者代表窗口的时间跨度,目前支持五种单位,分别是 'ms'(毫秒)、's'(秒)、'm'(分钟)、'h'(小时)和'd'(天)。缺省情况下,全部输入数据都属于同一个窗口。
+
+**输出序列:** 输出单个序列,类型为DOUBLE,其中每一个数据点的值的范围都是 [0,1].
+
+**提示:** 只有当窗口内的数据点数目超过10时,才会进行时效性计算。否则,该窗口将被忽略,不做任何输出。
+
+
+### 使用示例
+
+#### 参数缺省
+
+在参数缺省的情况下,本函数将会把全部输入数据都作为同一个窗口计算时效性。
+
+输入序列:
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
++-----------------------------+---------------+
+```
+
+用于查询的SQL语句:
+
+```sql
+select timeliness(s1) from root.test.d1 where time <= 2020-01-01 00:00:30
+```
+
+输出序列:
+
+```
++-----------------------------+---------------------------+
+|                         Time|timeliness(root.test.d1.s1)|
++-----------------------------+---------------------------+
+|2020-01-01T00:00:02.000+08:00|         0.9333333333333333|
++-----------------------------+---------------------------+
+```
+
+#### 指定窗口大小
+
+在指定窗口大小的情况下,本函数会把输入数据划分为若干个窗口计算时效性。
+
+输入序列:
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
+|2020-01-01T00:00:32.000+08:00|          130.0|
+|2020-01-01T00:00:34.000+08:00|          132.0|
+|2020-01-01T00:00:36.000+08:00|          134.0|
+|2020-01-01T00:00:38.000+08:00|          136.0|
+|2020-01-01T00:00:40.000+08:00|          138.0|
+|2020-01-01T00:00:42.000+08:00|          140.0|
+|2020-01-01T00:00:44.000+08:00|          142.0|
+|2020-01-01T00:00:46.000+08:00|          144.0|
+|2020-01-01T00:00:48.000+08:00|          146.0|
+|2020-01-01T00:00:50.000+08:00|          148.0|
+|2020-01-01T00:00:52.000+08:00|          150.0|
+|2020-01-01T00:00:54.000+08:00|          152.0|
+|2020-01-01T00:00:56.000+08:00|          154.0|
+|2020-01-01T00:00:58.000+08:00|          156.0|
+|2020-01-01T00:01:00.000+08:00|          158.0|
++-----------------------------+---------------+
+```
+
+用于查询的SQL语句:
+
+```sql
+select timeliness(s1,"window"="15") from root.test.d1 where time <= 2020-01-01 00:01:00
+```
+
+输出序列:
+
+```
++-----------------------------+------------------------------------------+
+|                         Time|timeliness(root.test.d1.s1, "window"="15")|
++-----------------------------+------------------------------------------+
+|2020-01-01T00:00:02.000+08:00|                        0.9333333333333333|
+|2020-01-01T00:00:32.000+08:00|                                       1.0|
++-----------------------------+------------------------------------------+
+```
+
+## Validity
+
+### 函数简介
+本函数用于计算时间序列的有效性。将输入序列划分为若干个连续且不重叠的窗口,分别计算每一个窗口的有效性,并输出窗口第一个数据点的时间戳和窗口的有效性。
+
+
+**函数名:** VALIDITY
+
+**输入序列:** 仅支持单个输入序列,类型为 INT32 / INT64 / FLOAT / DOUBLE
+
+**参数:**
+
++ `window`:窗口大小,它是一个大于0的整数或者一个有单位的正数。前者代表每一个窗口包含的数据点数目,最后一个窗口的数据点数目可能会不足;后者代表窗口的时间跨度,目前支持五种单位,分别是 'ms'(毫秒)、's'(秒)、'm'(分钟)、'h'(小时)和'd'(天)。缺省情况下,全部输入数据都属于同一个窗口。
+
+**输出序列:** 输出单个序列,类型为DOUBLE,其中每一个数据点的值的范围都是 [0,1].
+
+**提示:** 只有当窗口内的数据点数目超过10时,才会进行有效性计算。否则,该窗口将被忽略,不做任何输出。
+
+
+### 使用示例
+
+#### 参数缺省
+
+在参数缺省的情况下,本函数将会把全部输入数据都作为同一个窗口计算有效性。
+
+输入序列:
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
++-----------------------------+---------------+
+```
+
+用于查询的SQL语句:
+
+```sql
+select validity(s1) from root.test.d1 where time <= 2020-01-01 00:00:30
+```
+
+输出序列:
+
+```
++-----------------------------+-------------------------+
+|                         Time|validity(root.test.d1.s1)|
++-----------------------------+-------------------------+
+|2020-01-01T00:00:02.000+08:00|       0.8833333333333333|
++-----------------------------+-------------------------+
+```
+
+#### 指定窗口大小
+
+在指定窗口大小的情况下,本函数会把输入数据划分为若干个窗口计算有效性。
+
+输入序列:
+```
++-----------------------------+---------------+
+|                         Time|root.test.d1.s1|
++-----------------------------+---------------+
+|2020-01-01T00:00:02.000+08:00|          100.0|
+|2020-01-01T00:00:03.000+08:00|          101.0|
+|2020-01-01T00:00:04.000+08:00|          102.0|
+|2020-01-01T00:00:06.000+08:00|          104.0|
+|2020-01-01T00:00:08.000+08:00|          126.0|
+|2020-01-01T00:00:10.000+08:00|          108.0|
+|2020-01-01T00:00:14.000+08:00|          112.0|
+|2020-01-01T00:00:15.000+08:00|          113.0|
+|2020-01-01T00:00:16.000+08:00|          114.0|
+|2020-01-01T00:00:18.000+08:00|          116.0|
+|2020-01-01T00:00:20.000+08:00|          118.0|
+|2020-01-01T00:00:22.000+08:00|          120.0|
+|2020-01-01T00:00:26.000+08:00|          124.0|
+|2020-01-01T00:00:28.000+08:00|          126.0|
+|2020-01-01T00:00:30.000+08:00|            NaN|
+|2020-01-01T00:00:32.000+08:00|          130.0|
+|2020-01-01T00:00:34.000+08:00|          132.0|
+|2020-01-01T00:00:36.000+08:00|          134.0|
+|2020-01-01T00:00:38.000+08:00|          136.0|
+|2020-01-01T00:00:40.000+08:00|          138.0|
+|2020-01-01T00:00:42.000+08:00|          140.0|
+|2020-01-01T00:00:44.000+08:00|          142.0|
+|2020-01-01T00:00:46.000+08:00|          144.0|
+|2020-01-01T00:00:48.000+08:00|          146.0|
+|2020-01-01T00:00:50.000+08:00|          148.0|
+|2020-01-01T00:00:52.000+08:00|          150.0|
+|2020-01-01T00:00:54.000+08:00|          152.0|
+|2020-01-01T00:00:56.000+08:00|          154.0|
+|2020-01-01T00:00:58.000+08:00|          156.0|
+|2020-01-01T00:01:00.000+08:00|          158.0|
++-----------------------------+---------------+
+```
+
+用于查询的SQL语句:
+
+```sql
+select validity(s1,"window"="15") from root.test.d1 where time <= 2020-01-01 00:01:00
+```
+
+输出序列:
+
+```
++-----------------------------+----------------------------------------+
+|                         Time|validity(root.test.d1.s1, "window"="15")|
++-----------------------------+----------------------------------------+
+|2020-01-01T00:00:02.000+08:00|                      0.8833333333333333|
+|2020-01-01T00:00:32.000+08:00|                                     1.0|
++-----------------------------+----------------------------------------+
+```
\ No newline at end of file
diff --git a/docs/zh/UserGuide/Library-UDF/Get-Started.md b/docs/zh/UserGuide/Library-UDF/Get-Started.md
new file mode 100644
index 0000000000..b282328849
--- /dev/null
+++ b/docs/zh/UserGuide/Library-UDF/Get-Started.md
@@ -0,0 +1,58 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+    
+        http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+# 快速上手
+
+## 什么是 UDF 函数库
+
+对基于时序数据的应用而言,数据质量至关重要。**UDF 函数库** 基于 IoTDB 用户自定义函数 (UDF),实现了一系列关于数据质量的函数,包括数据画像、数据质量评估与修复等,有效满足了工业领域对数据质量的需求。
+
+### 联系我们
+
++ Email: iotdb-quality@protonmail.com
+
+## 快速开始
+1. 下载包含全部依赖的 jar 包和注册脚本;
+2. 将 jar 包复制到 IoTDB 程序目录的`ext\udf`目录下;
+3. 运行`sbin\start-server.bat`(在 Windows 下)或`sbin\start-server.sh`(在 Linux 或 MacOS 下)以启动 IoTDB 服务器;
+4. 将注册脚本复制到 IoTDB 的程序目录下(与`sbin`目录同级的根目录下),修改脚本中的参数(如果需要)并运行注册脚本以注册 UDF。
+
+## 下载
+
+您可以下载下列文件:
+
+<table>
+    <tr>
+        <th align="center">版本</th>
+        <th align="center">包含全部依赖的 Jar 包</th>
+        <th align="center" colspan="2">注册脚本</th>
+        <th align="center" colspan="2">用户手册</th>
+        <th align="center">支持的IoTDB版本</th>
+    </tr>
+    <tr>
+        <td align="center">In progress</td>
+        <td align="center"><a href="https://thulab.github.io/iotdb-quality/download/iotdb-quality-2.0.0-SNAPSHOT-jar-with-dependencies.jar">包含全部依赖的 Jar 包</a></td>
+        <td align="center"><a href="https://thulab.github.io/iotdb-quality/download/register-UDF.bat">Windows</a></td>
+        <td align="center"><a href="https://thulab.github.io/iotdb-quality/download/register-UDF.sh">Linux/MacOS</a></td>
+        <td align="center"><a href="https://thulab.github.io/iotdb-quality/download/UserManual_en.pdf">英文版</a></td>
+        <td align="center"><a href="https://thulab.github.io/iotdb-quality/download/UserManual_zh.pdf">中文版</a></td>
+        <td align="center">>= 0.12.0</td>
+    </tr>
+</table>
\ No newline at end of file
diff --git a/library-udf/src/main/java/org/apache/iotdb/library/dquality/UDTFCompleteness.java b/library-udf/src/main/java/org/apache/iotdb/library/dquality/UDTFCompleteness.java
new file mode 100644
index 0000000000..db1d356632
--- /dev/null
+++ b/library-udf/src/main/java/org/apache/iotdb/library/dquality/UDTFCompleteness.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.library.dquality;
+
+import org.apache.iotdb.db.query.udf.api.UDTF;
+import org.apache.iotdb.db.query.udf.api.access.RowWindow;
+import org.apache.iotdb.db.query.udf.api.collector.PointCollector;
+import org.apache.iotdb.db.query.udf.api.customizer.config.UDTFConfigurations;
+import org.apache.iotdb.db.query.udf.api.customizer.parameter.UDFParameters;
+import org.apache.iotdb.db.query.udf.api.customizer.strategy.SlidingSizeWindowAccessStrategy;
+import org.apache.iotdb.db.query.udf.api.customizer.strategy.SlidingTimeWindowAccessStrategy;
+import org.apache.iotdb.library.dquality.util.TimeSeriesQuality;
+import org.apache.iotdb.library.util.Util;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
+
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/** This function calculates completeness of input series. */
+public class UDTFCompleteness implements UDTF {
+  private boolean downtime;
+
+  @Override
+  public void beforeStart(UDFParameters udfp, UDTFConfigurations udtfc) throws Exception {
+    boolean isTime = false;
+    long window = Integer.MAX_VALUE;
+    if (udfp.hasAttribute("window")) {
+      String s = udfp.getString("window");
+      window = Util.parseTime(s);
+      if (window > 0) {
+        isTime = true;
+      } else {
+        window = Long.parseLong(s);
+      }
+    }
+    if (isTime) {
+      udtfc.setAccessStrategy(new SlidingTimeWindowAccessStrategy(window));
+    } else {
+      udtfc.setAccessStrategy(new SlidingSizeWindowAccessStrategy((int) window));
+    }
+    udtfc.setOutputDataType(TSDataType.DOUBLE);
+    downtime = udfp.getBooleanOrDefault("downtime", true);
+  }
+
+  @Override
+  public void transform(RowWindow rowWindow, PointCollector collector) throws Exception {
+    try {
+      if (rowWindow.windowSize() > TimeSeriesQuality.windowSize) {
+        TimeSeriesQuality tsq = new TimeSeriesQuality(rowWindow.getRowIterator());
+        tsq.setDowntime(downtime);
+        tsq.timeDetect();
+        collector.putDouble(rowWindow.getRow(0).getTime(), tsq.getCompleteness());
+      }
+    } catch (IOException ex) {
+      Logger.getLogger(UDTFCompleteness.class.getName()).log(Level.SEVERE, null, ex);
+    }
+  }
+}
diff --git a/library-udf/src/main/java/org/apache/iotdb/library/dquality/UDTFConsistency.java b/library-udf/src/main/java/org/apache/iotdb/library/dquality/UDTFConsistency.java
new file mode 100644
index 0000000000..b01a7f35a3
--- /dev/null
+++ b/library-udf/src/main/java/org/apache/iotdb/library/dquality/UDTFConsistency.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.library.dquality;
+
+import org.apache.iotdb.db.query.udf.api.UDTF;
+import org.apache.iotdb.db.query.udf.api.access.RowWindow;
+import org.apache.iotdb.db.query.udf.api.collector.PointCollector;
+import org.apache.iotdb.db.query.udf.api.customizer.config.UDTFConfigurations;
+import org.apache.iotdb.db.query.udf.api.customizer.parameter.UDFParameters;
+import org.apache.iotdb.db.query.udf.api.customizer.strategy.SlidingSizeWindowAccessStrategy;
+import org.apache.iotdb.db.query.udf.api.customizer.strategy.SlidingTimeWindowAccessStrategy;
+import org.apache.iotdb.library.dquality.util.TimeSeriesQuality;
+import org.apache.iotdb.library.util.NoNumberException;
+import org.apache.iotdb.library.util.Util;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
+
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/** This function calculates consistency of input series. */
+public class UDTFConsistency implements UDTF {
+
+  @Override
+  public void beforeStart(UDFParameters udfp, UDTFConfigurations udtfc) throws Exception {
+    boolean isTime = false;
+    long window = Integer.MAX_VALUE;
+    if (udfp.hasAttribute("window")) {
+      String s = udfp.getString("window");
+      window = Util.parseTime(s);
+      if (window > 0) {
+        isTime = true;
+      } else {
+        window = Long.parseLong(s);
+      }
+    }
+    if (isTime) {
+      udtfc.setAccessStrategy(new SlidingTimeWindowAccessStrategy(window));
+    } else {
+      udtfc.setAccessStrategy(new SlidingSizeWindowAccessStrategy((int) window));
+    }
+    udtfc.setOutputDataType(TSDataType.DOUBLE);
+  }
+
+  @Override
+  public void transform(RowWindow rowWindow, PointCollector collector) throws Exception {
+    try {
+      if (rowWindow.windowSize() > TimeSeriesQuality.windowSize) {
+        TimeSeriesQuality tsq = new TimeSeriesQuality(rowWindow.getRowIterator());
+        tsq.timeDetect();
+        collector.putDouble(rowWindow.getRow(0).getTime(), tsq.getConsistency());
+      }
+    } catch (IOException | NoNumberException ex) {
+      Logger.getLogger(UDTFCompleteness.class.getName()).log(Level.SEVERE, null, ex);
+    }
+  }
+}
diff --git a/library-udf/src/main/java/org/apache/iotdb/library/dquality/UDTFTimeliness.java b/library-udf/src/main/java/org/apache/iotdb/library/dquality/UDTFTimeliness.java
new file mode 100644
index 0000000000..89552f4b16
--- /dev/null
+++ b/library-udf/src/main/java/org/apache/iotdb/library/dquality/UDTFTimeliness.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.library.dquality;
+
+import org.apache.iotdb.db.query.udf.api.UDTF;
+import org.apache.iotdb.db.query.udf.api.access.RowWindow;
+import org.apache.iotdb.db.query.udf.api.collector.PointCollector;
+import org.apache.iotdb.db.query.udf.api.customizer.config.UDTFConfigurations;
+import org.apache.iotdb.db.query.udf.api.customizer.parameter.UDFParameters;
+import org.apache.iotdb.db.query.udf.api.customizer.strategy.SlidingSizeWindowAccessStrategy;
+import org.apache.iotdb.db.query.udf.api.customizer.strategy.SlidingTimeWindowAccessStrategy;
+import org.apache.iotdb.library.dquality.util.TimeSeriesQuality;
+import org.apache.iotdb.library.util.NoNumberException;
+import org.apache.iotdb.library.util.Util;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
+
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/** This function calculates timeliness of input series. */
+public class UDTFTimeliness implements UDTF {
+  @Override
+  public void beforeStart(UDFParameters udfp, UDTFConfigurations udtfc) throws Exception {
+    boolean isTime = false;
+    long window = Integer.MAX_VALUE;
+    if (udfp.hasAttribute("window")) {
+      String s = udfp.getString("window");
+      window = Util.parseTime(s);
+      if (window > 0) {
+        isTime = true;
+      } else {
+        window = Long.parseLong(s);
+      }
+    }
+    if (isTime) {
+      udtfc.setAccessStrategy(new SlidingTimeWindowAccessStrategy(window));
+    } else {
+      udtfc.setAccessStrategy(new SlidingSizeWindowAccessStrategy((int) window));
+    }
+    udtfc.setOutputDataType(TSDataType.DOUBLE);
+  }
+
+  @Override
+  public void transform(RowWindow rowWindow, PointCollector collector) throws Exception {
+    try {
+      if (rowWindow.windowSize() > TimeSeriesQuality.windowSize) {
+        TimeSeriesQuality tsq = new TimeSeriesQuality(rowWindow.getRowIterator());
+        tsq.timeDetect();
+        collector.putDouble(rowWindow.getRow(0).getTime(), tsq.getTimeliness());
+      }
+    } catch (IOException | NoNumberException ex) {
+      Logger.getLogger(UDTFCompleteness.class.getName()).log(Level.SEVERE, null, ex);
+    }
+  }
+}
diff --git a/library-udf/src/main/java/org/apache/iotdb/library/dquality/UDTFValidity.java b/library-udf/src/main/java/org/apache/iotdb/library/dquality/UDTFValidity.java
new file mode 100644
index 0000000000..99e3345c72
--- /dev/null
+++ b/library-udf/src/main/java/org/apache/iotdb/library/dquality/UDTFValidity.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.library.dquality;
+
+import org.apache.iotdb.db.query.udf.api.UDTF;
+import org.apache.iotdb.db.query.udf.api.access.RowWindow;
+import org.apache.iotdb.db.query.udf.api.collector.PointCollector;
+import org.apache.iotdb.db.query.udf.api.customizer.config.UDTFConfigurations;
+import org.apache.iotdb.db.query.udf.api.customizer.parameter.UDFParameters;
+import org.apache.iotdb.db.query.udf.api.customizer.strategy.SlidingSizeWindowAccessStrategy;
+import org.apache.iotdb.db.query.udf.api.customizer.strategy.SlidingTimeWindowAccessStrategy;
+import org.apache.iotdb.library.dquality.util.TimeSeriesQuality;
+import org.apache.iotdb.library.util.NoNumberException;
+import org.apache.iotdb.library.util.Util;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
+
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/** This function calculates validity of input series. */
+public class UDTFValidity implements UDTF {
+  @Override
+  public void beforeStart(UDFParameters udfp, UDTFConfigurations udtfc) throws Exception {
+    boolean isTime = false;
+    long window = Integer.MAX_VALUE;
+    if (udfp.hasAttribute("window")) {
+      String s = udfp.getString("window");
+      window = Util.parseTime(s);
+      if (window > 0) {
+        isTime = true;
+      } else {
+        window = Long.parseLong(s);
+      }
+    }
+    if (isTime) {
+      udtfc.setAccessStrategy(new SlidingTimeWindowAccessStrategy(window));
+    } else {
+      udtfc.setAccessStrategy(new SlidingSizeWindowAccessStrategy((int) window));
+    }
+    udtfc.setOutputDataType(TSDataType.DOUBLE);
+  }
+
+  @Override
+  public void transform(RowWindow rowWindow, PointCollector collector) throws Exception {
+    try {
+      if (rowWindow.windowSize() > TimeSeriesQuality.windowSize) {
+        TimeSeriesQuality tsq = new TimeSeriesQuality(rowWindow.getRowIterator());
+        tsq.valueDetect();
+        collector.putDouble(rowWindow.getRow(0).getTime(), tsq.getValidity());
+      }
+    } catch (IOException | NoNumberException ex) {
+      Logger.getLogger(UDTFCompleteness.class.getName()).log(Level.SEVERE, null, ex);
+    }
+  }
+}
diff --git a/library-udf/src/main/java/org/apache/iotdb/library/dquality/util/TimeSeriesQuality.java b/library-udf/src/main/java/org/apache/iotdb/library/dquality/util/TimeSeriesQuality.java
new file mode 100644
index 0000000000..dfc31d21f6
--- /dev/null
+++ b/library-udf/src/main/java/org/apache/iotdb/library/dquality/util/TimeSeriesQuality.java
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.library.dquality.util;
+
+import org.apache.iotdb.db.query.udf.api.access.Row;
+import org.apache.iotdb.db.query.udf.api.access.RowIterator;
+import org.apache.iotdb.library.util.Util;
+
+import org.apache.commons.math3.stat.descriptive.rank.Median;
+
+import java.io.File;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Scanner;
+
+/** Class for computing data quality index. */
+public class TimeSeriesQuality {
+  public static final int windowSize = 10;
+  private boolean downtime = true; // count for shutdown period
+  private int cnt = 0; // total number of points
+  private int missCnt = 0; // number of missing points
+  private int specialCnt = 0; // number of special values
+  private int lateCnt = 0; // number of latency points
+  private int redundancyCnt = 0; // number of redundancy points
+  private int valueCnt = 0; // number of out of range points
+  private int variationCnt = 0; // number of variation out of range points
+  private int speedCnt = 0; // number of speed out of range points
+  private int speedchangeCnt = 0; // number of speed change(acceleration) out of range points
+  private final double[] time; // series without special values
+  private final double[] origin; // series without special values
+
+  public TimeSeriesQuality(RowIterator dataIterator) throws Exception {
+    ArrayList<Double> timeList = new ArrayList<>();
+    ArrayList<Double> originList = new ArrayList<>();
+    while (dataIterator.hasNextRow()) {
+      Row row = dataIterator.next();
+      cnt++;
+      double v = Util.getValueAsDouble(row);
+      double t = Long.valueOf(row.getTime()).doubleValue();
+      if (Double.isFinite(v)) {
+        timeList.add(t);
+        originList.add(v);
+      } else { // processing NAN,INF
+        specialCnt++;
+        timeList.add(t);
+        originList.add(Double.NaN);
+      }
+    }
+    time = Util.toDoubleArray(timeList);
+    origin = Util.toDoubleArray(originList);
+    processNaN();
+  }
+
+  public TimeSeriesQuality(String filename) throws Exception {
+    Scanner sc = new Scanner(new File(filename));
+    ArrayList<Double> timeList = new ArrayList<>();
+    ArrayList<Double> originList = new ArrayList<>();
+    SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    sc.useDelimiter("\\s*(,|\\r|\\n)\\s*"); // set separator
+    sc.nextLine();
+    while (sc.hasNext()) {
+      cnt++;
+      double t = format.parse(sc.next()).getTime();
+      double v = sc.nextDouble();
+      if (Double.isFinite(v)) {
+        timeList.add(t);
+        originList.add(v);
+      } else { // processing NAN,INF
+        specialCnt++;
+        timeList.add(t);
+        originList.add(Double.NaN);
+      }
+    }
+    time = Util.toDoubleArray(timeList);
+    origin = Util.toDoubleArray(originList);
+    processNaN();
+  }
+
+  /** linear interpolation of NaN */
+  private void processNaN() throws Exception {
+    int n = origin.length;
+    int index1 = 0;
+    int index2;
+    while (index1 < n && Double.isNaN(origin[index1])) {
+      index1++;
+    }
+    index2 = index1 + 1;
+    while (index2 < n && Double.isNaN(origin[index2])) {
+      index2++;
+    }
+    if (index2 >= n) {
+      throw new Exception("At least two non-NaN values are needed");
+    }
+    // interpolation at the beginning of the series
+    for (int i = 0; i < index2; i++) {
+      origin[i] =
+          origin[index1]
+              + (origin[index2] - origin[index1])
+                  * (time[i] - time[index1])
+                  / (time[index2] - time[index1]);
+    }
+    // interpolation at the middle of the series
+    for (int i = index2 + 1; i < n; i++) {
+      if (!Double.isNaN(origin[i])) {
+        index1 = index2;
+        index2 = i;
+        for (int j = index1 + 1; j < index2; j++) {
+          origin[j] =
+              origin[index1]
+                  + (origin[index2] - origin[index1])
+                      * (time[j] - time[index1])
+                      / (time[index2] - time[index1]);
+        }
+      }
+    }
+    // interpolation at the end of the series
+    for (int i = index2 + 1; i < n; i++) {
+      origin[i] =
+          origin[index1]
+              + (origin[index2] - origin[index1])
+                  * (time[i] - time[index1])
+                  / (time[index2] - time[index1]);
+    }
+  }
+
+  /** Detect timestamp errors */
+  public void timeDetect() {
+    // compute interval properties
+    double[] interval = Util.variation(time);
+    Median median = new Median();
+    double base = median.evaluate(interval);
+    // find timestamp anomalies
+    ArrayList<Double> window = new ArrayList<>();
+    int i;
+    for (i = 0; i < Math.min(time.length, windowSize); i++) { // fill initial data
+      window.add(time[i]);
+    }
+    while (window.size() > 1) {
+      double times = (window.get(1) - window.get(0)) / base;
+      if (times <= 0.5) { // delete over-concentrated points
+        window.remove(1);
+        redundancyCnt++;
+      } else if (times >= 2.0 && (!downtime || times <= 9.0)) { // exclude power-off periods
+        // large interval means missing or delaying
+        int temp = 0; // find number of over-concentrated points in the following window
+        for (int j = 2; j < window.size(); j++) {
+          double times2 = (window.get(j) - window.get(j - 1)) / base;
+          if (times2 >= 2.0) { // end searching when another missing is found
+            break;
+          }
+          if (times2 <= 0.5) { // over-concentrated points founded, maybe caused by delaying
+            temp++;
+            window.remove(j); // move delayed points
+            j--;
+            if (temp == (int) Math.round(times - 1)) {
+              break; // enough points to fill have been found
+            }
+          }
+        }
+        lateCnt += temp;
+        missCnt += (Math.round(times - 1) - temp);
+      }
+      window.remove(0); // remove processed points
+      while (window.size() < windowSize && i < time.length) {
+        // fill into the window
+        window.add(time[i]);
+        i++;
+      }
+    }
+  }
+
+  /** preparation for validity */
+  public void valueDetect() {
+    int k = 3;
+    valueCnt = findOutliers(origin, k);
+    // range anomaly
+    double[] variation = Util.variation(origin);
+    variationCnt = findOutliers(variation, k);
+    // speed anomaly
+    double[] speed = Util.speed(origin, time);
+    speedCnt = findOutliers(speed, k);
+    // acceleration anomaly
+    double[] speedchange = Util.variation(speed);
+    speedchangeCnt = findOutliers(speedchange, k);
+  }
+
+  /** return number of points lie out of median +- k * MAD */
+  private int findOutliers(double[] value, double k) {
+    Median median = new Median();
+    double mid = median.evaluate(value);
+    double sigma = Util.mad(value);
+    int num = 0;
+    for (double v : value) {
+      if (Math.abs(v - mid) > k * sigma) {
+        num++;
+      }
+    }
+    return num;
+  }
+
+  public double getCompleteness() {
+    return 1 - (missCnt + specialCnt) * 1.0 / (cnt + missCnt);
+  }
+
+  public double getConsistency() {
+    return 1 - redundancyCnt * 1.0 / cnt;
+  }
+
+  public double getTimeliness() {
+    return 1 - lateCnt * 1.0 / cnt;
+  }
+
+  public double getValidity() {
+    return 1 - (valueCnt + variationCnt + speedCnt + speedchangeCnt) * 0.25 / cnt;
+  }
+
+  /** @return the downtime */
+  public boolean isDowntime() {
+    return downtime;
+  }
+
+  /** @param downtime the downtime to set */
+  public void setDowntime(boolean downtime) {
+    this.downtime = downtime;
+  }
+}
diff --git a/library-udf/src/test/java/org/apache/iotdb/library/dquality/DQualityTests.java b/library-udf/src/test/java/org/apache/iotdb/library/dquality/DQualityTests.java
new file mode 100644
index 0000000000..1ef1f213c0
--- /dev/null
+++ b/library-udf/src/test/java/org/apache/iotdb/library/dquality/DQualityTests.java
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.library.dquality;
+
+import org.apache.iotdb.db.conf.IoTDBDescriptor;
+import org.apache.iotdb.db.exception.metadata.MetadataException;
+import org.apache.iotdb.db.metadata.path.PartialPath;
+import org.apache.iotdb.db.service.IoTDB;
+import org.apache.iotdb.integration.env.ConfigFactory;
+import org.apache.iotdb.integration.env.EnvFactory;
+import org.apache.iotdb.jdbc.Config;
+import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+
+import static org.junit.Assert.fail;
+
+public class DQualityTests {
+  protected static final int ITERATION_TIMES = 10_000;
+  private static final float oldUdfCollectorMemoryBudgetInMB =
+      IoTDBDescriptor.getInstance().getConfig().getUdfCollectorMemoryBudgetInMB();
+  private static final float oldUdfTransformerMemoryBudgetInMB =
+      IoTDBDescriptor.getInstance().getConfig().getUdfTransformerMemoryBudgetInMB();
+  private static final float oldUdfReaderMemoryBudgetInMB =
+      IoTDBDescriptor.getInstance().getConfig().getUdfReaderMemoryBudgetInMB();
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    ConfigFactory.getConfig()
+        .setUdfCollectorMemoryBudgetInMB(5)
+        .setUdfTransformerMemoryBudgetInMB(5)
+        .setUdfReaderMemoryBudgetInMB(5);
+    EnvFactory.getEnv().initBeforeClass();
+    createTimeSeries();
+    generateData();
+    registerUDF();
+  }
+
+  private static void createTimeSeries() throws MetadataException {
+    IoTDB.metaManager.setStorageGroup(new PartialPath("root.vehicle"));
+    IoTDB.metaManager.createTimeseries(
+        new PartialPath("root.vehicle.d1.s1"),
+        TSDataType.INT32,
+        TSEncoding.PLAIN,
+        CompressionType.UNCOMPRESSED,
+        null);
+    IoTDB.metaManager.createTimeseries(
+        new PartialPath("root.vehicle.d1.s2"),
+        TSDataType.INT64,
+        TSEncoding.PLAIN,
+        CompressionType.UNCOMPRESSED,
+        null);
+    IoTDB.metaManager.createTimeseries(
+        new PartialPath("root.vehicle.d2.s1"),
+        TSDataType.FLOAT,
+        TSEncoding.PLAIN,
+        CompressionType.UNCOMPRESSED,
+        null);
+    IoTDB.metaManager.createTimeseries(
+        new PartialPath("root.vehicle.d2.s2"),
+        TSDataType.DOUBLE,
+        TSEncoding.PLAIN,
+        CompressionType.UNCOMPRESSED,
+        null);
+  }
+
+  private static void generateData() {
+    double x = -100d, y = 100d; // borders of random value
+    long a = 0, b = 1000000000;
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      for (int i = 1; i <= ITERATION_TIMES; ++i) {
+        statement.execute(
+            String.format(
+                "insert into root.vehicle.d1(timestamp,s1,s2) values(%d,%d,%d)",
+                (int) Math.floor(a + Math.random() * b % (b - a + 1)),
+                (int) Math.floor(x + Math.random() * y % (y - x + 1)),
+                (int) Math.floor(x + Math.random() * y % (y - x + 1))));
+        statement.execute(
+            (String.format(
+                "insert into root.vehicle.d2(timestamp,s1,s2) values(%d,%f,%f)",
+                (int) Math.floor(a + Math.random() * b % (b - a + 1)),
+                x + Math.random() * y % (y - x + 1),
+                x + Math.random() * y % (y - x + 1))));
+      }
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  private static void registerUDF() {
+    try (Connection connection = EnvFactory.getEnv().getConnection();
+        Statement statement = connection.createStatement()) {
+      statement.execute(
+          "create function completeness as 'org.apache.iotdb.library.dquality.UDTFCompleteness'");
+      statement.execute(
+          "create function timeliness as 'org.apache.iotdb.library.dquality.UDTFTimeliness'");
+      statement.execute(
+          "create function consistency as 'org.apache.iotdb.library.dquality.UDTFConsistency'");
+      statement.execute(
+          "create function validity as 'org.apache.iotdb.library.dquality.UDTFValidity'");
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    EnvFactory.getEnv().cleanAfterClass();
+    ConfigFactory.getConfig()
+        .setUdfCollectorMemoryBudgetInMB(oldUdfCollectorMemoryBudgetInMB)
+        .setUdfTransformerMemoryBudgetInMB(oldUdfTransformerMemoryBudgetInMB)
+        .setUdfReaderMemoryBudgetInMB(oldUdfReaderMemoryBudgetInMB);
+  }
+
+  @Test
+  public void testCompleteness1() {
+    String sqlStr = "select completeness(d1.s1) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testCompleteness2() {
+    String sqlStr = "select completeness(d1.s2) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testCompleteness3() {
+    String sqlStr = "select completeness(d2.s1) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testCompleteness4() {
+    String sqlStr = "select completeness(d2.s2) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testTimeliness1() {
+    String sqlStr = "select timeliness(d1.s1) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testTimeliness2() {
+    String sqlStr = "select timeliness(d1.s2) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testTimeliness3() {
+    String sqlStr = "select timeliness(d2.s1) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testTimeliness4() {
+    String sqlStr = "select timeliness(d2.s2) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testConsistency1() {
+    String sqlStr = "select consistency(d1.s1) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testConsistency2() {
+    String sqlStr = "select consistency(d1.s2) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testConsistency3() {
+    String sqlStr = "select consistency(d2.s1) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testConsistency4() {
+    String sqlStr = "select consistency(d2.s2) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testValidity1() {
+    String sqlStr = "select validity(d1.s1) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testValidity2() {
+    String sqlStr = "select validity(d1.s2) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testValidity3() {
+    String sqlStr = "select validity(d2.s1) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testValidity4() {
+    String sqlStr = "select validity(d2.s2) from root.vehicle";
+    try (Connection connection =
+            DriverManager.getConnection(
+                Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root", "root");
+        Statement statement = connection.createStatement()) {
+      ResultSet resultSet = statement.executeQuery(sqlStr);
+      Double result = Double.parseDouble(resultSet.getString(1));
+      Assert.assertTrue(result >= -0.0D && result <= 1.0D);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+}
diff --git a/site/src/main/.vuepress/config.js b/site/src/main/.vuepress/config.js
index 49ee7a536d..b7f03a8245 100644
--- a/site/src/main/.vuepress/config.js
+++ b/site/src/main/.vuepress/config.js
@@ -718,6 +718,13 @@ var config = {
 						]
 					},
 					{
+<<<<<<< HEAD
+					    title: 'UDF Library',
+					    children: [
+					        ['Library-UDF/Get-Started', 'Get Started'],
+					        ['Library-UDF/Data-Quality', 'Data Quality']
+					    ]
+=======
 						title: 'Write and Delete Data',
 						sidebarDepth: 1,
 						children: [
@@ -726,6 +733,7 @@ var config = {
 							['Write-And-Delete-Data/CSV-Tool','CSV Tool'],
 							['Write-And-Delete-Data/Delete-Data','Delete Data']
 						]
+>>>>>>> master
 					},
 					{
 						title: 'Query Data',
@@ -1637,6 +1645,13 @@ var config = {
 						]
 					},
 					{
+<<<<<<< HEAD
+          				title: 'UDF 函数库',
+          				children: [
+          					['Library-UDF/Get-started', '快速上手'],
+          					['Library-UDF/Data-Quality', '数据质量']
+          				]
+=======
 						title: '数据写入和删除',
 						sidebarDepth: 1,
 						children: [
@@ -1645,6 +1660,7 @@ var config = {
 							['Write-And-Delete-Data/CSV-Tool','导入导出 CSV'],
 							['Write-And-Delete-Data/Delete-Data','删除数据']
 						]
+>>>>>>> master
 					},
 					{
 						title: '数据查询',