You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by li...@apache.org on 2023/01/06 06:59:23 UTC

[flink-ml] 02/02: [FLINK-30532] Add benchmark for DCT, SQLTransformer and StopWordsRemover

This is an automated email from the ASF dual-hosted git repository.

lindong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git

commit 3bd19a22c5005be4a7f086a357b2d52f7a0f6905
Author: yunfengzhou-hub <yu...@outlook.com>
AuthorDate: Thu Dec 29 09:47:52 2022 +0800

    [FLINK-30532] Add benchmark for DCT, SQLTransformer and StopWordsRemover
    
    This closes #195.
---
 .../src/main/resources/dct-benchmark.json          | 36 +++++++++++++++++
 .../main/resources/sqltransformer-benchmark.json   | 38 ++++++++++++++++++
 .../main/resources/stopwordsremover-benchmark.json | 45 ++++++++++++++++++++++
 3 files changed, 119 insertions(+)

diff --git a/flink-ml-benchmark/src/main/resources/dct-benchmark.json b/flink-ml-benchmark/src/main/resources/dct-benchmark.json
new file mode 100644
index 0000000..684a7cd
--- /dev/null
+++ b/flink-ml-benchmark/src/main/resources/dct-benchmark.json
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+{
+  "version": 1,
+  "dct10000000": {
+    "inputData": {
+      "className": "org.apache.flink.ml.benchmark.datagenerator.common.DenseVectorGenerator",
+      "paramMap": {
+        "colNames": [
+          [
+            "input"
+          ]
+        ],
+        "seed": 2,
+        "numValues": 10000000,
+        "vectorDim": 100
+      }
+    },
+    "stage": {
+      "className": "org.apache.flink.ml.feature.dct.DCT"
+    }
+  }
+}
diff --git a/flink-ml-benchmark/src/main/resources/sqltransformer-benchmark.json b/flink-ml-benchmark/src/main/resources/sqltransformer-benchmark.json
new file mode 100644
index 0000000..5265ecf
--- /dev/null
+++ b/flink-ml-benchmark/src/main/resources/sqltransformer-benchmark.json
@@ -0,0 +1,38 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+{
+  "version": 1,
+  "sqltransformer100000000": {
+    "inputData": {
+      "className": "org.apache.flink.ml.benchmark.datagenerator.common.DoubleGenerator",
+      "paramMap": {
+        "colNames": [
+          [
+            "v1"
+          ]
+        ],
+        "seed": 2,
+        "numValues": 100000000
+      }
+    },
+    "stage": {
+      "className": "org.apache.flink.ml.feature.sqltransformer.SQLTransformer",
+      "paramMap": {
+        "statement": "SELECT *, ABS(v1) AS v2 FROM __THIS__"
+      }
+    }
+  }
+}
diff --git a/flink-ml-benchmark/src/main/resources/stopwordsremover-benchmark.json b/flink-ml-benchmark/src/main/resources/stopwordsremover-benchmark.json
new file mode 100644
index 0000000..cd1dde9
--- /dev/null
+++ b/flink-ml-benchmark/src/main/resources/stopwordsremover-benchmark.json
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+{
+  "version": 1,
+  "stopwordsremover1000000": {
+    "inputData": {
+      "className": "org.apache.flink.ml.benchmark.datagenerator.common.RandomStringArrayGenerator",
+      "paramMap": {
+        "colNames": [
+          [
+            "input"
+          ]
+        ],
+        "seed": 2,
+        "numValues": 1000000,
+        "numDistinctValues": 100,
+        "arraySize": 100
+      }
+    },
+    "stage": {
+      "className": "org.apache.flink.ml.feature.stopwordsremover.StopWordsRemover",
+      "paramMap": {
+        "inputCols": [
+          "input"
+        ],
+        "outputCols": [
+          "output"
+        ]
+      }
+    }
+  }
+}