You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@submarine.apache.org by pi...@apache.org on 2021/01/18 06:39:41 UTC
[submarine] branch master updated: SUBMARINE-426. [SDK] Add Convolutional Click Prediction Model

This is an automated email from the ASF dual-hosted git repository.

pingsutw pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/submarine.git


The following commit(s) were added to refs/heads/master by this push:
     new 3e0f41c  SUBMARINE-426. [SDK] Add Convolutional Click Prediction Model
3e0f41c is described below

commit 3e0f41c53b15e412dd5167b910f36a075c3744c6
Author: Lisa <ae...@gmail.com>
AuthorDate: Tue Jan 12 18:46:51 2021 +0800

    SUBMARINE-426. [SDK] Add Convolutional Click Prediction Model
    
    ### What is this PR for?
    Add TensorFlow implementation of Convolutional Click Prediction Model
    
    [CIKM 2015][A Convolutional Click Prediction Model](http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf)
    
    ### What type of PR is it?
    [Improvement]
    
    ### Todos
    * [ ] - Task
    
    ### What is the Jira issue?
    https://issues.apache.org/jira/browse/SUBMARINE-426
    
    ### How should this be tested?
    https://travis-ci.org/github/aeioulisa/submarine/builds/753005415
    
    ### Screenshots (if appropriate)
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Lisa <ae...@gmail.com>
    
    Closes #487 from aeioulisa/SUBMARINE-426 and squashes the following commits:
    
    793db57 [Lisa] Repair the import error
    a45ecfe [Lisa] remove redundant logs
    9e6e39c [Lisa] Fix code style
    5e39d7e [Lisa] Add parameters
    1b5e42f [Lisa] remove README.md
    633b1e6 [Lisa] Add Convolutional Click Prediction Model
---
 .../pysubmarine/example/tensorflow/ccpm/ccpm.json  | 36 ++++++++++++
 .../example/tensorflow/ccpm/ccpm_distributed.json  | 36 ++++++++++++
 .../tensorflow/ccpm/run_ccpm.py}                   | 22 ++++++--
 .../submarine/ml/tensorflow/layers/core.py         | 53 +++++++++++++++++
 .../submarine/ml/tensorflow/model/__init__.py      |  3 +-
 .../submarine/ml/tensorflow/model/ccpm.py          | 66 ++++++++++++++++++++++
 .../submarine/ml/tensorflow/parameters.py          |  2 +
 .../ml/tensorflow/model/test_ccpm.py}              | 13 +++--
 8 files changed, 222 insertions(+), 9 deletions(-)

diff --git a/submarine-sdk/pysubmarine/example/tensorflow/ccpm/ccpm.json b/submarine-sdk/pysubmarine/example/tensorflow/ccpm/ccpm.json
new file mode 100644
index 0000000..ad54a38
--- /dev/null
+++ b/submarine-sdk/pysubmarine/example/tensorflow/ccpm/ccpm.json
@@ -0,0 +1,36 @@
+{
+  "input": {
+    "train_data": ["../../data/tr.libsvm"],
+    "valid_data": ["../../data/va.libsvm"],
+    "test_data": ["../../data/te.libsvm"],
+    "type": "libsvm"
+  },
+  "output": {
+    "save_model_dir": "./experiment",
+    "metric": "auc"
+  },
+  "training": {
+    "batch_size" : 512,
+    "field_size": 39,
+    "num_epochs": 3,
+    "feature_size": 117581,
+    "embedding_size": 256,
+    "learning_rate": 0.0005,
+    "batch_norm_decay": 0.9,
+    "l2_reg": 0.0001,
+    "deep_layers": [400, 400, 400],
+    "conv_kernel_width": [6,5],
+    "conv_filters": [4,4],
+    "dropout": [0.3, 0.3, 0.3],
+    "batch_norm": false,
+    "optimizer": "adam",
+    "log_steps": 10,
+    "seed": 77,
+    "mode": "local"
+  },
+  "resource": {
+    "num_cpu": 4,
+    "num_gpu": 0,
+    "num_thread": 0
+  }
+}
diff --git a/submarine-sdk/pysubmarine/example/tensorflow/ccpm/ccpm_distributed.json b/submarine-sdk/pysubmarine/example/tensorflow/ccpm/ccpm_distributed.json
new file mode 100644
index 0000000..d4f93b0
--- /dev/null
+++ b/submarine-sdk/pysubmarine/example/tensorflow/ccpm/ccpm_distributed.json
@@ -0,0 +1,36 @@
+{
+  "input": {
+    "train_data": ["hdfs:///user/submarine/data/tr.libsvm"],
+    "valid_data": ["hdfs:///user/submarine/data/va.libsvm"],
+    "test_data": ["hdfs:///user/submarine/data/te.libsvm"],
+    "type": "libsvm"
+  },
+  "output": {
+    "save_model_dir": "hdfs:///user/submarine/deepfm",
+    "metric": "auc"
+  },
+  "training": {
+    "batch_size" : 512,
+    "field_size": 39,
+    "num_epochs": 3,
+    "feature_size": 117581,
+    "embedding_size": 256,
+    "learning_rate": 0.0005,
+    "batch_norm_decay": 0.9,
+    "l2_reg": 0.0001,
+    "deep_layers": [400, 400, 400],
+    "conv_kernel_width": [6,5],
+    "conv_filters": [4,4],
+    "dropout": [0.3, 0.3, 0.3],
+    "batch_norm": false,
+    "optimizer": "adam",
+    "log_steps": 10,
+    "seed": 77,
+    "mode": "distributed"
+  },
+  "resource": {
+    "num_cpu": 4,
+    "num_gpu": 0,
+    "num_thread": 0
+  }
+}
diff --git a/submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/__init__.py b/submarine-sdk/pysubmarine/example/tensorflow/ccpm/run_ccpm.py
similarity index 55%
copy from submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/__init__.py
copy to submarine-sdk/pysubmarine/example/tensorflow/ccpm/run_ccpm.py
index febeb99..88acfca 100644
--- a/submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/__init__.py
+++ b/submarine-sdk/pysubmarine/example/tensorflow/ccpm/run_ccpm.py
@@ -13,8 +13,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .deepfm import DeepFM
-from .fm import FM
-from .nfm import NFM
+from submarine.ml.tensorflow.model import CCPM
+import argparse
 
-__all__ = ["DeepFM", "FM", "NFM"]
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-conf", help="a JSON configuration file for CCPM", type=str)
+    parser.add_argument("-task_type", default='train',
+                        help="train or evaluate, by default is train")
+    args = parser.parse_args()
+    json_path = args.conf
+    task_type = args.task_type
+
+    model = CCPM(json_path=json_path)
+
+    if task_type == 'train':
+        model.train()
+    if task_type == 'evaluate':
+        result = model.evaluate()
+        print("Model metrics : ", result)
diff --git a/submarine-sdk/pysubmarine/submarine/ml/tensorflow/layers/core.py b/submarine-sdk/pysubmarine/submarine/ml/tensorflow/layers/core.py
index 8afb9e2..ec0f18f 100644
--- a/submarine-sdk/pysubmarine/submarine/ml/tensorflow/layers/core.py
+++ b/submarine-sdk/pysubmarine/submarine/ml/tensorflow/layers/core.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import tensorflow as tf
+from tensorflow.keras.layers import Layer
 
 
 def batch_norm_layer(x, train_phase, scope_bn, batch_norm_decay):
@@ -181,3 +182,55 @@ def fm_layer(inputs, **kwargs):
         square_sum = tf.reduce_sum(tf.square(inputs), 1)
         fm_out = 0.5 * tf.reduce_sum(tf.subtract(sum_square, square_sum), 1)
     return fm_out
+
+
+class KMaxPooling(Layer):
+    """K Max pooling that selects the k biggest value along the specific axis.
+      Input shape
+        -  nD tensor with shape: ``(batch_size, ..., input_dim)``.
+      Output shape
+        - nD tensor with shape: ``(batch_size, ..., output_dim)``.
+      Arguments
+        - **k**: positive integer, number of top elements to look for along the ``axis`` dimension.
+        - **axis**: positive integer, the dimension to look for elements.
+     """
+
+    def __init__(self, k=1, axis=-1, **kwargs):
+
+        self.dims = 1
+        self.k = k
+        self.axis = axis
+        super(KMaxPooling, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+
+        if self.axis < 1 or self.axis > len(input_shape):
+            raise ValueError("axis must be 1~%d,now is %d" %
+                             (len(input_shape), self.axis))
+
+        if self.k < 1 or self.k > input_shape[self.axis]:
+            raise ValueError("k must be in 1 ~ %d,now k is %d" %
+                             (input_shape[self.axis], self.k))
+        self.dims = len(input_shape)
+        super(KMaxPooling, self).build(input_shape)
+
+    def call(self, inputs):
+
+        perm = list(range(self.dims))
+        perm[-1], perm[self.axis] = perm[self.axis], perm[-1]
+        shifted_input = tf.transpose(inputs, perm)
+
+        top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0]
+        output = tf.transpose(top_k, perm)
+
+        return output
+
+    def compute_output_shape(self, input_shape):
+        output_shape = list(input_shape)
+        output_shape[self.axis] = self.k
+        return tuple(output_shape)
+
+    def get_config(self, ):
+        config = {'k': self.k, 'axis': self.axis}
+        base_config = super(KMaxPooling, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
diff --git a/submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/__init__.py b/submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/__init__.py
index febeb99..7f561f9 100644
--- a/submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/__init__.py
+++ b/submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/__init__.py
@@ -16,5 +16,6 @@
 from .deepfm import DeepFM
 from .fm import FM
 from .nfm import NFM
+from .ccpm import CCPM
 
-__all__ = ["DeepFM", "FM", "NFM"]
+__all__ = ["DeepFM", "FM", "NFM", "CCPM"]
diff --git a/submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/ccpm.py b/submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/ccpm.py
new file mode 100644
index 0000000..de41adf
--- /dev/null
+++ b/submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/ccpm.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import tensorflow as tf
+
+from submarine.ml.tensorflow.layers.core import (dnn_layer, embedding_layer, linear_layer,
+                                                 KMaxPooling)
+from submarine.ml.tensorflow.model.base_tf_model import BaseTFModel
+from submarine.utils.tf_utils import get_estimator_spec
+
+logger = logging.getLogger(__name__)
+
+
+class CCPM(BaseTFModel):
+    def model_fn(self, features, labels, mode, params):
+        super().model_fn(features, labels, mode, params)
+
+        if len(params['training']['conv_kernel_width']) != len(params['training']['conv_filters']):
+            raise ValueError(
+                "conv_kernel_width must have same element with conv_filters")
+
+        linear_logit = linear_layer(features, **params['training'])
+        embedding_outputs = embedding_layer(features, **params['training'])
+        conv_filters = params['training']['conv_filters']
+        conv_kernel_width = params['training']['conv_kernel_width']
+
+        n = params['training']['embedding_size']
+        conv_filters_len = len(conv_filters)
+        conv_input = tf.concat(embedding_outputs, axis=1)
+
+        pooling_result = tf.keras.layers.Lambda(
+            lambda x: tf.expand_dims(x, axis=3))(conv_input)
+
+        for i in range(1, conv_filters_len + 1):
+            filters = conv_filters[i - 1]
+            width = conv_kernel_width[i - 1]
+            p = pow(i / conv_filters_len, conv_filters_len - i)
+            k = max(1, int((1 - p) * n)) if i < conv_filters_len else 3
+
+            conv_result = tf.keras.layers.Conv2D(filters=filters, kernel_size=(width, 1),
+                                                 strides=(1, 1), padding='same',
+                                                 activation='tanh', use_bias=True, )(pooling_result)
+
+            pooling_result = KMaxPooling(
+                k=min(k, int(conv_result.shape[1])), axis=1)(conv_result)
+
+        flatten_result = tf.keras.layers.Flatten()(pooling_result)
+        deep_logit = dnn_layer(flatten_result, mode, **params['training'])
+
+        with tf.variable_scope("CCPM_out"):
+            logit = linear_logit + deep_logit
+
+        return get_estimator_spec(logit, labels, mode, params)
diff --git a/submarine-sdk/pysubmarine/submarine/ml/tensorflow/parameters.py b/submarine-sdk/pysubmarine/submarine/ml/tensorflow/parameters.py
index a35312d..0815a2d 100644
--- a/submarine-sdk/pysubmarine/submarine/ml/tensorflow/parameters.py
+++ b/submarine-sdk/pysubmarine/submarine/ml/tensorflow/parameters.py
@@ -28,6 +28,8 @@ default_parameters = {
         "batch_norm_decay": 0.9,
         "l2_reg": 0.0001,
         "deep_layers": [400, 400, 400],
+        "conv_kernel_width": [6, 5],
+        "conv_filters": [4, 4],
         "dropout": [0.3, 0.3, 0.3],
         "batch_norm": "false",
         "optimizer": "adam",
diff --git a/submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/__init__.py b/submarine-sdk/pysubmarine/tests/ml/tensorflow/model/test_ccpm.py
similarity index 78%
copy from submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/__init__.py
copy to submarine-sdk/pysubmarine/tests/ml/tensorflow/model/test_ccpm.py
index febeb99..536c049 100644
--- a/submarine-sdk/pysubmarine/submarine/ml/tensorflow/model/__init__.py
+++ b/submarine-sdk/pysubmarine/tests/ml/tensorflow/model/test_ccpm.py
@@ -13,8 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .deepfm import DeepFM
-from .fm import FM
-from .nfm import NFM
+from submarine.ml.tensorflow.model import CCPM
 
-__all__ = ["DeepFM", "FM", "NFM"]
+
+def test_run_ccpm(get_model_param):
+    params = get_model_param
+
+    model = CCPM(model_params=params)
+    model.train()
+    model.evaluate()
+    model.predict()


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@submarine.apache.org
For additional commands, e-mail: dev-help@submarine.apache.org