You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by zh...@apache.org on 2023/01/30 09:19:52 UTC
[flink-ml] branch master updated: [FLINK-30568] Add benchmark for PolyNomialExpansion, Normalizer, Binarizer, Interaction, MaxAbsScaler, VectorSlicer, ElementWiseProduct and Featurehasher
This is an automated email from the ASF dual-hosted git repository.
zhangzp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git
The following commit(s) were added to refs/heads/master by this push:
new 169aca4 [FLINK-30568] Add benchmark for PolyNomialExpansion, Normalizer, Binarizer, Interaction, MaxAbsScaler, VectorSlicer, ElementWiseProduct and Featurehasher
169aca4 is described below
commit 169aca44fa6541d9b230e7e8a4c5b146a35cda67
Author: weibo <wb...@pku.edu.cn>
AuthorDate: Mon Jan 30 17:19:46 2023 +0800
[FLINK-30568] Add benchmark for PolyNomialExpansion, Normalizer, Binarizer, Interaction, MaxAbsScaler, VectorSlicer, ElementWiseProduct and Featurehasher
This closes #198.
---
.../src/main/resources/binarizer-benchmark.json | 62 ++++++++++++++++++++++
.../resources/elementwiseproduct-benchmark.json | 39 ++++++++++++++
.../main/resources/featurehasher-benchmark.json | 54 +++++++++++++++++++
.../src/main/resources/interaction-benchmark.json | 48 +++++++++++++++++
.../src/main/resources/maxabsscaler-benchmark.json | 36 +++++++++++++
.../src/main/resources/normalizer-benchmark.json | 39 ++++++++++++++
.../resources/polynoimalexpansion-benchmark.json | 39 ++++++++++++++
.../src/main/resources/vectorslicer-benchmark.json | 39 ++++++++++++++
8 files changed, 356 insertions(+)
diff --git a/flink-ml-benchmark/src/main/resources/binarizer-benchmark.json b/flink-ml-benchmark/src/main/resources/binarizer-benchmark.json
new file mode 100644
index 0000000..fc0a5fd
--- /dev/null
+++ b/flink-ml-benchmark/src/main/resources/binarizer-benchmark.json
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+{
+ "version": 1,
+ "binarizer10000000": {
+ "inputData": {
+ "className": "org.apache.flink.ml.benchmark.datagenerator.common.DoubleGenerator",
+ "paramMap": {
+ "colNames": [
+ [
+ "f0",
+ "f1",
+ "f2",
+ "f3",
+ "f4"
+ ]
+ ],
+ "seed": 2,
+ "numValues": 10000000
+ }
+ },
+ "stage": {
+ "className": "org.apache.flink.ml.feature.binarizer.Binarizer",
+ "paramMap": {
+ "inputCols": [
+ "f0",
+ "f1",
+ "f2",
+ "f3",
+ "f4"
+ ],
+ "outputCols": [
+ "outputCol0",
+ "outputCol1",
+ "outputCol2",
+ "outputCol3",
+ "outputCol4"
+ ],
+ "thresholds": [
+ 0.5,
+ 0.3,
+ 0.3,
+ 0.6,
+ 0.8
+ ]
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/flink-ml-benchmark/src/main/resources/elementwiseproduct-benchmark.json b/flink-ml-benchmark/src/main/resources/elementwiseproduct-benchmark.json
new file mode 100644
index 0000000..740799a
--- /dev/null
+++ b/flink-ml-benchmark/src/main/resources/elementwiseproduct-benchmark.json
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+{
+ "version": 1,
+ "elementwiseproduct10000000": {
+ "inputData": {
+ "className": "org.apache.flink.ml.benchmark.datagenerator.common.DenseVectorGenerator",
+ "paramMap": {
+ "vectorDim": 5,
+ "colNames": [
+ [
+ "featuresCol"
+ ]
+ ],
+ "seed": 2,
+ "numValues": 10000000
+ }
+ },
+ "stage": {
+ "className": "org.apache.flink.ml.feature.elementwiseproduct.ElementwiseProduct",
+ "paramMap": {
+ "scalingVec": {"values": [1.0, 2.0, 3.0, 4.0, 5.0]}
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/flink-ml-benchmark/src/main/resources/featurehasher-benchmark.json b/flink-ml-benchmark/src/main/resources/featurehasher-benchmark.json
new file mode 100644
index 0000000..5c3826f
--- /dev/null
+++ b/flink-ml-benchmark/src/main/resources/featurehasher-benchmark.json
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+{
+ "version": 1,
+ "featurehasher10000000": {
+ "inputData": {
+ "className": "org.apache.flink.ml.benchmark.datagenerator.common.DoubleGenerator",
+ "paramMap": {
+ "colNames": [
+ [
+ "f0",
+ "f1",
+ "f2",
+ "f3",
+ "f4"
+ ]
+ ],
+ "seed": 2,
+ "numValues": 10000000
+ }
+ },
+ "stage": {
+ "className": "org.apache.flink.ml.feature.featurehasher.FeatureHasher",
+ "paramMap": {
+ "inputCols": [
+ "f0",
+ "f1",
+ "f2",
+ "f3",
+ "f4"
+ ],
+ "categoricalCols": [
+ "f0",
+ "f1",
+ "f2"
+ ],
+ "numFeatures": 1000
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/flink-ml-benchmark/src/main/resources/interaction-benchmark.json b/flink-ml-benchmark/src/main/resources/interaction-benchmark.json
new file mode 100644
index 0000000..01bb8bb
--- /dev/null
+++ b/flink-ml-benchmark/src/main/resources/interaction-benchmark.json
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+{
+ "version": 1,
+ "interaction10000000": {
+ "inputData": {
+ "className": "org.apache.flink.ml.benchmark.datagenerator.common.DoubleGenerator",
+ "paramMap": {
+ "colNames": [
+ [
+ "f0",
+ "f1",
+ "f2",
+ "f3",
+ "f4"
+ ]
+ ],
+ "seed": 2,
+ "numValues": 10000000
+ }
+ },
+ "stage": {
+ "className": "org.apache.flink.ml.feature.interaction.Interaction",
+ "paramMap": {
+ "inputCols": [
+ "f0",
+ "f1",
+ "f2",
+ "f3",
+ "f4"
+ ]
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/flink-ml-benchmark/src/main/resources/maxabsscaler-benchmark.json b/flink-ml-benchmark/src/main/resources/maxabsscaler-benchmark.json
new file mode 100644
index 0000000..9cbe218
--- /dev/null
+++ b/flink-ml-benchmark/src/main/resources/maxabsscaler-benchmark.json
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+{
+ "version": 1,
+ "maxabsscaler10000000": {
+ "inputData": {
+ "className": "org.apache.flink.ml.benchmark.datagenerator.common.DenseVectorGenerator",
+ "paramMap": {
+ "vectorDim": 100,
+ "colNames": [
+ [
+ "featuresCol"
+ ]
+ ],
+ "seed": 2,
+ "numValues": 10000000
+ }
+ },
+ "stage": {
+ "className": "org.apache.flink.ml.feature.maxabsscaler.MaxAbsScaler"
+ }
+ }
+}
\ No newline at end of file
diff --git a/flink-ml-benchmark/src/main/resources/normalizer-benchmark.json b/flink-ml-benchmark/src/main/resources/normalizer-benchmark.json
new file mode 100644
index 0000000..77d1f9e
--- /dev/null
+++ b/flink-ml-benchmark/src/main/resources/normalizer-benchmark.json
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+{
+ "version": 1,
+ "normalizer10000000": {
+ "inputData": {
+ "className": "org.apache.flink.ml.benchmark.datagenerator.common.DenseVectorGenerator",
+ "paramMap": {
+ "vectorDim": 5,
+ "colNames": [
+ [
+ "featuresCol"
+ ]
+ ],
+ "seed": 2,
+ "numValues": 10000000
+ }
+ },
+ "stage": {
+ "className": "org.apache.flink.ml.feature.normalizer.Normalizer",
+ "paramMap": {
+ "p": 2.0
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/flink-ml-benchmark/src/main/resources/polynoimalexpansion-benchmark.json b/flink-ml-benchmark/src/main/resources/polynoimalexpansion-benchmark.json
new file mode 100644
index 0000000..d35a45b
--- /dev/null
+++ b/flink-ml-benchmark/src/main/resources/polynoimalexpansion-benchmark.json
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+{
+ "version": 1,
+ "polynoimalexpansion10000000": {
+ "inputData": {
+ "className": "org.apache.flink.ml.benchmark.datagenerator.common.DenseVectorGenerator",
+ "paramMap": {
+ "vectorDim": 5,
+ "colNames": [
+ [
+ "featuresCol"
+ ]
+ ],
+ "seed": 2,
+ "numValues": 10000000
+ }
+ },
+ "stage": {
+ "className": "org.apache.flink.ml.feature.polynomialexpansion.PolynomialExpansion",
+ "paramMap": {
+ "degree": 2
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/flink-ml-benchmark/src/main/resources/vectorslicer-benchmark.json b/flink-ml-benchmark/src/main/resources/vectorslicer-benchmark.json
new file mode 100644
index 0000000..7f77166
--- /dev/null
+++ b/flink-ml-benchmark/src/main/resources/vectorslicer-benchmark.json
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+{
+ "version": 1,
+ "vectorslicer10000000": {
+ "inputData": {
+ "className": "org.apache.flink.ml.benchmark.datagenerator.common.DenseVectorGenerator",
+ "paramMap": {
+ "vectorDim": 10,
+ "colNames": [
+ [
+ "featuresCol"
+ ]
+ ],
+ "seed": 2,
+ "numValues": 10000000
+ }
+ },
+ "stage": {
+ "className": "org.apache.flink.ml.feature.vectorslicer.VectorSlicer",
+ "paramMap": {
+ "indices": [1, 3 ,5 ,7]
+ }
+ }
+ }
+}
\ No newline at end of file