You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ap...@apache.org on 2016/06/08 21:40:41 UTC

[44/51] [partial] mahout git commit: (nojira) add native-viennaCL module to codebase. closes apache/mahout#241

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/southern_islands/tahiti.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/southern_islands/tahiti.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/southern_islands/tahiti.hpp
new file mode 100644
index 0000000..aec9043
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/southern_islands/tahiti.hpp
@@ -0,0 +1,84 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_AMD_SOUTHERN_ISLANDS_HPP_
+#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_AMD_SOUTHERN_ISLANDS_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl/device_specific/templates/matrix_product_template.hpp"
+
+#include "viennacl/device_specific/forwards.h"
+#include "viennacl/device_specific/builtin_database/common.hpp"
+
+namespace viennacl{
+namespace device_specific{
+namespace builtin_database{
+namespace devices{
+namespace gpu{
+namespace amd{
+namespace southern_islands{
+namespace tahiti{
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,32,16,8,1,1,16,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,16,16,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,4,64));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(2,8,2,16,4,2,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,16,4,4,4,2,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_LOCAL,4,16));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,8,32,32,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,8,32,32,4,1,2,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,64,32,4,4,2,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_LOCAL,16,16));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,128,32,2,2,1,8,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_LOCAL,32,8));
+}
+
+
+}
+}
+}
+}
+}
+}
+}
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/volcanic_islands/hawaii.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/volcanic_islands/hawaii.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/volcanic_islands/hawaii.hpp
new file mode 100644
index 0000000..c2674f0
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/volcanic_islands/hawaii.hpp
@@ -0,0 +1,84 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_AMD_VOLCANIC_ISLANDS_HPP_
+#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_AMD_VOLCANIC_ISLANDS_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl/device_specific/templates/matrix_product_template.hpp"
+
+#include "viennacl/device_specific/forwards.h"
+#include "viennacl/device_specific/builtin_database/common.hpp"
+
+namespace viennacl{
+namespace device_specific{
+namespace builtin_database{
+namespace devices{
+namespace gpu{
+namespace amd{
+namespace volcanic_islands{
+namespace hawaii{
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(1,8,16,32,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(1,8,16,32,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(2,8,8,8,6,1,6,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(1,16,16,16,2,1,8,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(1,1,2,64,8,2,4,FETCH_FROM_GLOBAL_CONTIGUOUS,FETCH_FROM_GLOBAL_STRIDED,0,0));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(1,16,16,16,2,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(2,16,16,16,6,1,6,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(1,64,64,4,2,4,8,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_LOCAL,64,4));
+}
+
+
+}
+}
+}
+}
+}
+}
+}
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/fallback.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/fallback.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/fallback.hpp
new file mode 100644
index 0000000..ff307f3
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/fallback.hpp
@@ -0,0 +1,84 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_FALLBACK_HPP_
+#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_FALLBACK_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl/device_specific/forwards.h"
+#include "viennacl/device_specific/builtin_database/common.hpp"
+
+#include "viennacl/device_specific/templates/matrix_product_template.hpp"
+
+namespace viennacl{
+namespace device_specific{
+namespace builtin_database{
+namespace devices{
+namespace gpu{
+namespace fallback{
+
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_4B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_4B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_4B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_4B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8));
+}
+
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_8B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_8B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_8B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_8B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8));
+}
+
+
+}
+}
+}
+}
+}
+}
+
+
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gt_540m.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gt_540m.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gt_540m.hpp
new file mode 100644
index 0000000..24c02b7
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gt_540m.hpp
@@ -0,0 +1,59 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_GT540M_HPP_
+#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_GT540M_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl/device_specific/templates/matrix_product_template.hpp"
+
+#include "viennacl/device_specific/forwards.h"
+#include "viennacl/device_specific/builtin_database/common.hpp"
+
+namespace viennacl{
+namespace device_specific{
+namespace builtin_database{
+namespace devices{
+namespace gpu{
+namespace nvidia{
+namespace fermi{
+namespace geforce_gt_540m{
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GT 540M", matrix_product_template::parameters_type(1, 16, 16, 8, 4, 1, 8, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 16, 8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GT 540M", matrix_product_template::parameters_type(1, 16, 16, 16, 8, 1, 4, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 32, 8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GT 540M", matrix_product_template::parameters_type(1, 8, 16, 16, 8, 1, 4, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 16, 8));
+}
+
+
+}
+}
+}
+}
+}
+}
+}
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_470.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_470.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_470.hpp
new file mode 100644
index 0000000..31a329b
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_470.hpp
@@ -0,0 +1,83 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_GEFORCE_GTX_470_HPP_
+#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_GEFORCE_GTX_470_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl/device_specific/templates/matrix_product_template.hpp"
+
+#include "viennacl/device_specific/forwards.h"
+#include "viennacl/device_specific/builtin_database/common.hpp"
+
+namespace viennacl{
+namespace device_specific{
+namespace builtin_database{
+namespace devices{
+namespace gpu{
+namespace nvidia{
+namespace fermi{
+namespace geforce_gtx_470{
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,2,32,32,4,1,2,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,32,2));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,8,16,8,2,2,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,4));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,128,32,1,2,1,8,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,16,32,4,4,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,2,16,64,8,1,2,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_CONTIGUOUS,16,8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,32,32,16,2,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,16));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,8,16,32,8,2,2,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,16,32,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8));
+}
+
+}
+}
+}
+}
+}
+}
+}
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_580.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_580.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_580.hpp
new file mode 100644
index 0000000..7015ea5
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_580.hpp
@@ -0,0 +1,84 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_GTX580_HPP_
+#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_GTX580_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl/device_specific/templates/matrix_product_template.hpp"
+
+#include "viennacl/device_specific/forwards.h"
+#include "viennacl/device_specific/builtin_database/common.hpp"
+
+namespace viennacl{
+namespace device_specific{
+namespace builtin_database{
+namespace devices{
+namespace gpu{
+namespace nvidia{
+namespace fermi{
+namespace geforce_gtx_580{
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,2,1,128,4,1,4,FETCH_FROM_GLOBAL_CONTIGUOUS,FETCH_FROM_GLOBAL_STRIDED,0,0));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,16,128,32,2,4,2,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,32,16));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,4,64,128,4,1,2,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_STRIDED,0,0));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,128,32,1,1,1,8,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_LOCAL,32,4));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,2,32,32,8,1,2,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,32,2));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,16,32,16,4,4,2,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(2,16,16,16,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,128,16,2,4,1,8,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+
+}
+}
+}
+}
+}
+}
+}
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/tesla_c2050.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/tesla_c2050.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/tesla_c2050.hpp
new file mode 100644
index 0000000..f430d6c
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/tesla_c2050.hpp
@@ -0,0 +1,84 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_TESLA_C2050_HPP_
+#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_TESLA_C2050_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl/device_specific/templates/matrix_product_template.hpp"
+
+#include "viennacl/device_specific/forwards.h"
+#include "viennacl/device_specific/builtin_database/common.hpp"
+
+namespace viennacl{
+namespace device_specific{
+namespace builtin_database{
+namespace devices{
+namespace gpu{
+namespace nvidia{
+namespace fermi{
+namespace tesla_c2050{
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,4,32,32,8,1,2,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,16,8));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,16,32,32,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,32,16));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,4,128,64,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_CONTIGUOUS,16,16));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,16,16,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,16,32,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,16,32,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,16,16,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,16,32,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8));
+}
+
+
+}
+}
+}
+}
+}
+}
+}
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/kepler/tesla_k20m.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/kepler/tesla_k20m.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/kepler/tesla_k20m.hpp
new file mode 100644
index 0000000..73a62fc
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/kepler/tesla_k20m.hpp
@@ -0,0 +1,84 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_KEPLER_K20M_HPP_
+#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_KEPLER_K20M_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl/device_specific/templates/matrix_product_template.hpp"
+
+#include "viennacl/device_specific/forwards.h"
+#include "viennacl/device_specific/builtin_database/common.hpp"
+
+namespace viennacl{
+namespace device_specific{
+namespace builtin_database{
+namespace devices{
+namespace gpu{
+namespace nvidia{
+namespace kepler{
+namespace tesla_k20m{
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,2,8,32,8,2,4,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,4,16));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,16,16,32,2,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,32));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,2,8,64,16,1,2,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,32,4));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,128,32,1,1,1,16,FETCH_FROM_GLOBAL_CONTIGUOUS,FETCH_FROM_LOCAL,16,8));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,8,32,16,4,8,4,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,8,16));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,32,16,32,8,2,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,64));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(4,8,2,4,8,2,8,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,128,64,1,4,2,16,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_LOCAL,16,8));
+}
+
+
+}
+}
+}
+}
+}
+}
+}
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/maxwell/geforce_gtx_750_ti.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/maxwell/geforce_gtx_750_ti.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/maxwell/geforce_gtx_750_ti.hpp
new file mode 100644
index 0000000..2c3f080
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/maxwell/geforce_gtx_750_ti.hpp
@@ -0,0 +1,85 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_3_GEFORCE_GTX_750_TI_HPP_
+#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_3_GEFORCE_GTX_750_TI_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl/device_specific/templates/matrix_product_template.hpp"
+
+
+#include "viennacl/device_specific/forwards.h"
+#include "viennacl/device_specific/builtin_database/common.hpp"
+
+namespace viennacl{
+namespace device_specific{
+namespace builtin_database{
+namespace devices{
+namespace gpu{
+namespace nvidia{
+namespace maxwell{
+namespace geforce_gtx_750_ti{
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,2,8,128,1,1,2,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_STRIDED,0,0));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,8,32,32,2,1,2,FETCH_FROM_GLOBAL_CONTIGUOUS,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,16,8,32,1,8,2,FETCH_FROM_GLOBAL_CONTIGUOUS,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,16,8,32,1,2,2,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_STRIDED,0,0));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,4,32,16,8,2,4,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,16,4));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,16,16,16,4,2,8,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,16,16,32,4,8,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,16));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,16,16,16,8,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16));
+}
+
+
+}
+}
+}
+}
+}
+}
+}
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/tesla/geforce_gtx_260.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/tesla/geforce_gtx_260.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/tesla/geforce_gtx_260.hpp
new file mode 100644
index 0000000..88dd596
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/tesla/geforce_gtx_260.hpp
@@ -0,0 +1,84 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_TESLA_HPP_
+#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_TESLA_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl/device_specific/templates/matrix_product_template.hpp"
+
+#include "viennacl/device_specific/forwards.h"
+#include "viennacl/device_specific/builtin_database/common.hpp"
+
+namespace viennacl{
+namespace device_specific{
+namespace builtin_database{
+namespace devices{
+namespace gpu{
+namespace nvidia{
+namespace tesla{
+namespace geforce_gtx_260{
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,32,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,32,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,32,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,32,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,16,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,16,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,16,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>)
+{
+  db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,16,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0));
+}
+
+
+}
+}
+}
+}
+}
+}
+}
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/matrix_product.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/matrix_product.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/matrix_product.hpp
new file mode 100644
index 0000000..4437956
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/matrix_product.hpp
@@ -0,0 +1,244 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_MATRIX_PRODUCT_HPP_
+#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_MATRIX_PRODUCT_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl/device_specific/builtin_database/devices/gpu/amd/northern_islands/barts.hpp"
+
+#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/tesla_c2050.hpp"
+
+#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_470.hpp"
+
+#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/maxwell/geforce_gtx_750_ti.hpp"
+
+#include "viennacl/device_specific/builtin_database/devices/gpu/amd/northern_islands/scrapper.hpp"
+
+#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/tesla/geforce_gtx_260.hpp"
+
+#include "viennacl/device_specific/builtin_database/devices/gpu/amd/southern_islands/tahiti.hpp"
+#include "viennacl/device_specific/builtin_database/devices/gpu/amd/northern_islands/devastator.hpp"
+
+#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/kepler/tesla_k20m.hpp"
+#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_580.hpp"
+
+#include "viennacl/device_specific/builtin_database/devices/gpu/amd/volcanic_islands/hawaii.hpp"
+
+#include "viennacl/device_specific/builtin_database/devices/gpu/amd/evergreen/cypress.hpp"
+#include "viennacl/device_specific/builtin_database/devices/gpu/amd/evergreen/cedar.hpp"
+
+
+#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gt_540m.hpp"
+
+
+#include "viennacl/ocl/device_utils.hpp"
+#include "viennacl/scheduler/forwards.h"
+
+#include "viennacl/device_specific/builtin_database/devices/accelerator/fallback.hpp"
+#include "viennacl/device_specific/builtin_database/devices/cpu/fallback.hpp"
+#include "viennacl/device_specific/builtin_database/devices/gpu/fallback.hpp"
+#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gt_540m.hpp"
+
+/** @file viennacl/device_specific/builtin_database/matrix_product.hpp
+*
+* Initializes the device database with the provided profiles. Updated semi-automatically.
+*/
+
+namespace viennacl
+{
+namespace device_specific
+{
+namespace builtin_database
+{
+
+inline database_type<matrix_product_template::parameters_type> init_matrix_product_N_N()
+{
+  database_type<matrix_product_template::parameters_type> result;
+
+  devices::accelerator::fallback::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::accelerator::fallback::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>());
+
+  devices::cpu::fallback::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::cpu::fallback::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>());
+
+  devices::gpu::fallback::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::fallback::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>());
+
+  devices::gpu::amd::evergreen::cedar::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::amd::evergreen::cypress::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::amd::volcanic_islands::hawaii::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::amd::volcanic_islands::hawaii::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_580::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_580::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::kepler::tesla_k20m::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::kepler::tesla_k20m::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::amd::southern_islands::tahiti::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::amd::southern_islands::tahiti::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::amd::northern_islands::devastator::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::tesla::geforce_gtx_260::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::amd::northern_islands::scrapper::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_470::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_470::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::tesla_c2050::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::tesla_c2050::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>());
+  devices::gpu::amd::northern_islands::barts::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>());
+
+  return result;
+}
+
+inline database_type<matrix_product_template::parameters_type> init_matrix_product_T_N()
+{
+  database_type<matrix_product_template::parameters_type> result;
+
+  devices::accelerator::fallback::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::accelerator::fallback::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>());
+
+  devices::cpu::fallback::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::cpu::fallback::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>());
+
+  devices::gpu::fallback::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::fallback::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::geforce_gt_540m::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::amd::evergreen::cedar::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::amd::evergreen::cypress::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::amd::volcanic_islands::hawaii::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::amd::volcanic_islands::hawaii::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_580::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_580::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::kepler::tesla_k20m::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::kepler::tesla_k20m::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::amd::southern_islands::tahiti::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::amd::southern_islands::tahiti::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::amd::northern_islands::devastator::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::tesla::geforce_gtx_260::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::amd::northern_islands::scrapper::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_470::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_470::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::tesla_c2050::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::nvidia::fermi::tesla_c2050::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>());
+  devices::gpu::amd::northern_islands::barts::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>());
+
+  return result;
+}
+
+inline database_type<matrix_product_template::parameters_type> init_matrix_product_N_T()
+{
+  database_type<matrix_product_template::parameters_type> result;
+
+  devices::accelerator::fallback::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::accelerator::fallback::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>());
+
+  devices::cpu::fallback::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::cpu::fallback::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>());
+
+  devices::gpu::fallback::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::fallback::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>());
+
+  devices::gpu::nvidia::fermi::geforce_gt_540m::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::amd::evergreen::cedar::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::amd::evergreen::cypress::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::amd::volcanic_islands::hawaii::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::amd::volcanic_islands::hawaii::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_580::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_580::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::kepler::tesla_k20m::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::kepler::tesla_k20m::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::amd::southern_islands::tahiti::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::amd::southern_islands::tahiti::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::amd::northern_islands::devastator::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::tesla::geforce_gtx_260::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+
+
+  devices::gpu::amd::northern_islands::scrapper::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_470::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_470::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::fermi::tesla_c2050::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::fermi::tesla_c2050::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>());
+  devices::gpu::amd::northern_islands::barts::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>());
+
+  return result;
+}
+
+inline database_type<matrix_product_template::parameters_type> init_matrix_product_T_T()
+{
+  database_type<matrix_product_template::parameters_type> result;
+
+  devices::accelerator::fallback::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::accelerator::fallback::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>());
+
+  devices::cpu::fallback::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::cpu::fallback::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>());
+
+  devices::gpu::fallback::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::fallback::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::amd::evergreen::cedar::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::amd::evergreen::cypress::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::amd::volcanic_islands::hawaii::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::amd::volcanic_islands::hawaii::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_580::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_580::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::kepler::tesla_k20m::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::kepler::tesla_k20m::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::amd::southern_islands::tahiti::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::amd::southern_islands::tahiti::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::amd::northern_islands::devastator::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::tesla::geforce_gtx_260::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::amd::northern_islands::scrapper::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_470::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::fermi::geforce_gtx_470::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::fermi::tesla_c2050::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::nvidia::fermi::tesla_c2050::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>());
+  devices::gpu::amd::northern_islands::barts::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>());
+
+  return result;
+}
+
+static database_type<matrix_product_template::parameters_type> matrix_product_N_N = init_matrix_product_N_N();
+static database_type<matrix_product_template::parameters_type> matrix_product_T_N = init_matrix_product_T_N();
+static database_type<matrix_product_template::parameters_type> matrix_product_N_T = init_matrix_product_N_T();
+static database_type<matrix_product_template::parameters_type> matrix_product_T_T = init_matrix_product_T_T();
+
+template<class NumericT>
+matrix_product_template::parameters_type const & matrix_product_params(ocl::device const & device, char A_trans, char B_trans)
+{
+  assert(A_trans=='N' || A_trans=='T');
+  assert(B_trans=='N' || B_trans=='T');
+  database_type<matrix_product_template::parameters_type> * db;
+  if (A_trans=='N' && B_trans=='N')
+    db = &matrix_product_N_N;
+  else if (A_trans=='T' && B_trans=='N')
+    db = &matrix_product_T_N;
+  else if (A_trans=='N' && B_trans=='T')
+    db = &matrix_product_N_T;
+  else
+    db = &matrix_product_T_T;
+  return get_parameters<NumericT>(*db, device);
+}
+
+
+}
+}
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/execute.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/execute.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/execute.hpp
new file mode 100644
index 0000000..2f4960a
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/execute.hpp
@@ -0,0 +1,55 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_EXECUTE_HPP
+#define VIENNACL_DEVICE_SPECIFIC_EXECUTE_HPP
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+
+/** @file viennacl/device_specific/execute.hpp
+    @brief the user interface for the code generator
+*/
+
+#include <cstring>
+#include <vector>
+#include <typeinfo>
+
+#include "viennacl/scheduler/forwards.h"
+#include "viennacl/device_specific/forwards.h"
+#include "viennacl/device_specific/templates/template_base.hpp"
+#include "viennacl/device_specific/tree_parsing.hpp"
+#include "viennacl/device_specific/execution_handler.hpp"
+
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/tools/timer.hpp"
+
+namespace viennacl
+{
+namespace device_specific
+{
+
+inline void execute(template_base const & T, statements_container const & statements, viennacl::ocl::context & ctx = viennacl::ocl::current_context(), bool force_compilation = false)
+{
+  //Generate program name
+  std::string program_name = tree_parsing::statements_representation(statements, BIND_TO_HANDLE);
+  execution_handler handler(program_name, ctx, ctx.current_device(), force_compilation);
+  handler.add(program_name, T, statements);
+  handler.execute(program_name, statements);
+}
+
+}
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/execution_handler.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/execution_handler.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/execution_handler.hpp
new file mode 100644
index 0000000..8f725fd
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/execution_handler.hpp
@@ -0,0 +1,102 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_EXECUTION_HANDLER_HPP
+#define VIENNACL_DEVICE_SPECIFIC_EXECUTION_HANDLER_HPP
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+
+/** @file viennacl/device_specific/execution_handler.hpp
+    @brief Helper for handling fallbacks, lazy compilation, input-dependent kernels, etc
+*/
+
+#include <map>
+
+#include "viennacl/tools/shared_ptr.hpp"
+
+#include "viennacl/device_specific/lazy_program_compiler.hpp"
+#include "viennacl/device_specific/templates/template_base.hpp"
+#include "viennacl/device_specific/utils.hpp"
+
+namespace viennacl
+{
+namespace device_specific
+{
+
+class execution_handler
+{
+public:
+  typedef std::map< std::string, tools::shared_ptr<template_base> > container_type;
+
+private:
+  std::string append_prefix(std::string const & str)
+  {
+    return "_" + str;
+  }
+
+  std::string define_extension(std::string const & ext)
+  {
+    // Note: On devices without double precision support, 'ext' is an empty string.
+    return (ext.length() > 1) ? std::string("#pragma OPENCL EXTENSION " + ext + " : enable\n") : std::string("\n");
+  }
+
+  void init_program_compiler(std::string const & name, bool force_recompilation)
+  {
+    lazy_programs_.push_back(lazy_program_compiler(&ctx_, name, force_recompilation));
+    lazy_programs_.back().add(define_extension(device_.double_support_extension()));
+  }
+
+public:
+  execution_handler(std::string const & program_name_base, viennacl::ocl::context & ctx, viennacl::ocl::device const & device, bool force_recompilation = false) : ctx_(ctx), device_(device), program_names_(2)
+  {
+    lazy_programs_.reserve(2);
+    init_program_compiler(program_name_base + "_0", force_recompilation);
+    init_program_compiler(program_name_base + "_1", force_recompilation);
+  }
+
+  void add(std::string const & key, template_base const & T, statements_container const & statements)
+  {
+    if (kernels_.insert(container_type::value_type(key, T.clone())).second)
+    {
+      std::vector<std::string> sources = at(kernels_, key)->generate(append_prefix(key), statements, device_);
+      assert(sources.size()<=2);
+      for (unsigned int i = 0; i < sources.size(); ++i)
+        lazy_programs_[i].add(sources[i]);
+    }
+  }
+
+  template_base * template_of(std::string const & key)
+  {
+    return at(kernels_, key).get();
+  }
+
+  void execute(container_type::key_type const & key, statements_container const & statements)
+  {
+    tools::shared_ptr<template_base> & template_pointer = at(kernels_, key);
+    template_pointer->enqueue(append_prefix(key), lazy_programs_, statements);
+  }
+
+private:
+  viennacl::ocl::context & ctx_;
+  viennacl::ocl::device const & device_;
+  container_type kernels_;
+  std::vector<std::string> program_names_;
+  std::vector<lazy_program_compiler> lazy_programs_;
+};
+
+}
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/forwards.h
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/forwards.h b/native-viennaCL/src/main/cpp/viennacl/device_specific/forwards.h
new file mode 100644
index 0000000..590ed1f
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/forwards.h
@@ -0,0 +1,294 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_FORWARDS_H
+#define VIENNACL_DEVICE_SPECIFIC_FORWARDS_H
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+
+/** @file viennacl/device_specific/forwards.h
+    @brief Forwards declaration
+*/
+
+#include <list>
+#include <map>
+#include <set>
+#include <stdexcept>
+
+#include "viennacl/scheduler/io.hpp"
+
+#include "viennacl/ocl/forwards.h"
+#include "viennacl/tools/shared_ptr.hpp"
+#include "viennacl/scheduler/forwards.h"
+
+#include "viennacl/backend/mem_handle.hpp"
+
+namespace viennacl
+{
+namespace device_specific
+{
+
+//Error codes
+static const int TEMPLATE_VALID = 0;
+static const int TEMPLATE_LOCAL_MEMORY_OVERFLOW = -1;
+static const int TEMPLATE_WORK_GROUP_SIZE_OVERFLOW = -2;
+static const int TEMPLATE_LOCAL_SIZE_0_OVERFLOW = -3;
+static const int TEMPLATE_LOCAL_SIZE_1_OVERFLOW = -4;
+static const int TEMPLATE_LOCAL_SIZE_2_OVERFLOW = -5;
+static const int TEMPLATE_LOCAL_SIZE_NOT_WARP_MULTIPLE = -6;
+static const int TEMPLATE_INVALID_SIMD_WIDTH = -7;
+static const int TEMPLATE_INVALID_FETCHING_POLICY_TYPE= -9;
+
+static const int TEMPLATE_GLOBAL_MEMORY_REQUIRES_ZERO_LOCAL_FETCH = -10;
+static const int TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE = -11;
+static const int TEMPLATE_KS_MUST_BE_SMALLER_THAN_KL = -12;
+static const int TEMPLATE_SIMD_WIDTH_MUST_BE_ONE = -13;
+static const int TEMPLATE_LOCAL_FETCH_PRODUCT_MUST_MATCH_LOCAL_SIZE_PRODUCT = -14;
+static const int TEMPLATE_LOCAL_FETCH_0_MUST_BE_KL_MULTIPLE = -15;
+static const int TEMPLATE_LOCAL_FETCH_0_MUST_BE_NL_MULTIPLE = -16;
+static const int TEMPLATE_LOCAL_FETCH_1_MUST_BE_KL_MULTIPLE = -17;
+static const int TEMPLATE_LOCAL_FETCH_1_MUST_BE_ML_MULTIPLE = -18;
+
+struct index_tuple
+{
+  index_tuple(std::string const & _i, std::string const & _bound0) : i(_i), bound0(_bound0), j(""), bound1(""){ }
+  index_tuple(std::string const & _i, std::string const & _bound0, std::string const & _j, std::string const & _bound1) : i(_i), bound0(_bound0), j(_j), bound1(_bound1){ }
+  std::string i;
+  std::string bound0;
+  std::string j;
+  std::string bound1;
+};
+
+inline bool is_scalar_reduction(scheduler::statement_node const & node)
+{
+  return node.op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE || node.op.type_family==scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY;
+}
+
+inline bool is_vector_reduction(scheduler::statement_node const & node)
+{
+  return node.op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE
+      || node.op.type_family==scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY
+      || node.op.type_family==scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY;
+}
+
+inline scheduler::statement_node const & lhs_most(scheduler::statement::container_type const & array, vcl_size_t root)
+{
+  scheduler::statement_node const * current = &array[root];
+  while (current->lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY)
+    current = &array[current->lhs.node_index];
+  return *current;
+}
+
+enum expression_type
+{
+  SCALAR_AXPY_TYPE,
+  VECTOR_AXPY_TYPE,
+  MATRIX_AXPY_TYPE,
+  REDUCTION_TYPE,
+  ROW_WISE_REDUCTION_Nx_TYPE,
+  ROW_WISE_REDUCTION_Tx_TYPE,
+  MATRIX_PRODUCT_NN_TYPE,
+  MATRIX_PRODUCT_TN_TYPE,
+  MATRIX_PRODUCT_NT_TYPE,
+  MATRIX_PRODUCT_TT_TYPE,
+  INVALID_EXPRESSION_TYPE
+};
+
+inline const char * expression_type_to_string(expression_type type)
+{
+  switch (type)
+  {
+  case SCALAR_AXPY_TYPE : return "Scalar AXPY";
+  case VECTOR_AXPY_TYPE : return "Vector AXPY";
+  case MATRIX_AXPY_TYPE : return "Matrix AXPY";
+  case REDUCTION_TYPE : return "Reduction";
+  case ROW_WISE_REDUCTION_Nx_TYPE : return "Row-wise reduction: Ax";
+  case ROW_WISE_REDUCTION_Tx_TYPE : return "Row-wise reduction : Tx";
+  case MATRIX_PRODUCT_NN_TYPE : return "Matrix-Matrix Product : AA";
+  case MATRIX_PRODUCT_TN_TYPE : return "Matrix-Matrix Product : TA";
+  case MATRIX_PRODUCT_NT_TYPE : return "Matrix-Matrix Product : AT";
+  case MATRIX_PRODUCT_TT_TYPE : return "Matrix-Matrix Product : TT";
+  default : return "INVALID EXPRESSION";
+  }
+}
+
+/** @brief generate the string for a pointer kernel argument */
+static std::string generate_value_kernel_argument(std::string const & scalartype, std::string const & name)
+{
+  return scalartype + ' ' + name + ",";
+}
+
+/** @brief generate the string for a pointer kernel argument */
+static std::string generate_pointer_kernel_argument(std::string const & address_space, std::string const & scalartype, std::string const & name)
+{
+  return address_space +  " " + scalartype + "* " + name + ",";
+}
+
+/** @brief Emulation of C++11's .at() member for std::map<>, const-version */
+template<typename KeyT, typename ValueT>
+ValueT const & at(std::map<KeyT, ValueT> const & map, KeyT const & key)
+{
+  typename std::map<KeyT, ValueT>::const_iterator it = map.find(key);
+  if (it != map.end())
+    return it->second;
+
+  throw std::out_of_range("Generator: Key not found in map");
+}
+
+/** @brief Emulation of C++11's .at() member for std::map<>, non-const version */
+template<typename KeyT, typename ValueT>
+ValueT & at(std::map<KeyT, ValueT> & map, KeyT const & key)
+{
+  typename std::map<KeyT, ValueT>::iterator it = map.find(key);
+  if (it != map.end())
+    return it->second;
+
+  throw std::out_of_range("Generator: Key not found in map");
+}
+
+/** @brief Exception for the case the generator is unable to deal with the operation */
+class generator_not_supported_exception : public std::exception
+{
+public:
+  generator_not_supported_exception() : message_() {}
+  generator_not_supported_exception(std::string message) : message_("ViennaCL: Internal error: The generator cannot handle the statement provided: " + message) {}
+  virtual const char* what() const throw() { return message_.c_str(); }
+  virtual ~generator_not_supported_exception() throw() {}
+private:
+  std::string message_;
+};
+
+namespace utils
+{
+  class kernel_generation_stream;
+}
+
+
+enum leaf_t
+{
+  LHS_NODE_TYPE,
+  PARENT_NODE_TYPE,
+  RHS_NODE_TYPE
+};
+
+class mapped_object;
+class template_base;
+
+typedef std::pair<vcl_size_t, leaf_t> mapping_key;
+typedef std::map<mapping_key, tools::shared_ptr<mapped_object> > mapping_type;
+
+
+namespace tree_parsing
+{
+
+  template<class Fun>
+  inline void traverse(scheduler::statement const & statement, vcl_size_t root_idx, Fun const & fun, bool inspect);
+
+  inline void process(utils::kernel_generation_stream & stream, leaf_t leaf, std::string const & type_key, std::string const & to_process,
+                      scheduler::statement const & statement, vcl_size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed);
+  inline std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & accessors, scheduler::statement const & statement, vcl_size_t root_idx,mapping_type const & mapping);
+}
+
+using scheduler::INT_TYPE;
+using scheduler::UINT_TYPE;
+using scheduler::ULONG_TYPE;
+using scheduler::LONG_TYPE;
+using scheduler::FLOAT_TYPE;
+using scheduler::DOUBLE_TYPE;
+
+typedef cl_uint vendor_id_type;
+typedef cl_device_type device_type;
+typedef std::string device_name_type;
+
+class symbolic_binder
+{
+public:
+  virtual ~symbolic_binder(){ }
+  virtual bool bind(viennacl::backend::mem_handle const * ph) = 0;
+  virtual unsigned int get(viennacl::backend::mem_handle const * ph) = 0;
+};
+
+class bind_to_handle : public symbolic_binder
+{
+public:
+  bind_to_handle() : current_arg_(0){ }
+  bool bind(viennacl::backend::mem_handle const * ph) {return (ph==NULL)?true:memory.insert(std::make_pair((void*)ph, current_arg_)).second; }
+  unsigned int get(viennacl::backend::mem_handle const * ph){ return bind(ph) ? current_arg_++ : at(memory, (void*)ph); }
+private:
+  unsigned int current_arg_;
+  std::map<void*,unsigned int> memory;
+};
+
+class bind_all_unique : public symbolic_binder
+{
+public:
+  bind_all_unique() : current_arg_(0){ }
+  bool bind(viennacl::backend::mem_handle const *) {return true; }
+  unsigned int get(viennacl::backend::mem_handle const *){ return current_arg_++; }
+private:
+  unsigned int current_arg_;
+  std::map<void*,unsigned int> memory;
+};
+
+enum binding_policy_t{
+  BIND_ALL_UNIQUE,
+  BIND_TO_HANDLE
+};
+
+inline tools::shared_ptr<symbolic_binder> make_binder(binding_policy_t policy)
+{
+  if (policy==BIND_TO_HANDLE)
+    return tools::shared_ptr<symbolic_binder>(new bind_to_handle());
+  else
+    return tools::shared_ptr<symbolic_binder>(new bind_all_unique());
+}
+
+template<char C>
+struct char_to_type{ };
+
+class statements_container
+{
+public:
+  typedef std::list<scheduler::statement> data_type;
+  enum order_type { SEQUENTIAL, INDEPENDENT };
+
+  statements_container(data_type const & data, order_type order) : data_(data), order_(order)
+  { }
+
+  statements_container(scheduler::statement const & s0) : order_(INDEPENDENT)
+  {
+    data_.push_back(s0);
+  }
+
+  statements_container(scheduler::statement const & s0, scheduler::statement const & s1, order_type order) : order_(order)
+  {
+    data_.push_back(s0);
+    data_.push_back(s1);
+  }
+
+  std::list<scheduler::statement> const & data() const { return data_; }
+
+  order_type order() const { return order_; }
+
+private:
+  std::list<scheduler::statement> data_;
+  order_type order_;
+};
+
+}
+
+}
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/lazy_program_compiler.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/lazy_program_compiler.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/lazy_program_compiler.hpp
new file mode 100644
index 0000000..3e75b9b
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/lazy_program_compiler.hpp
@@ -0,0 +1,74 @@
+#ifndef VIENNACL_DEVICE_SPECIFIC_LAZY_PROGRAM_COMPILER_HPP
+#define VIENNACL_DEVICE_SPECIFIC_LAZY_PROGRAM_COMPILER_HPP
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+
+/** @file viennacl/device_specific/lazy_program_compiler.hpp
+    @brief Helper for compiling a program lazily
+*/
+
+#include <map>
+
+#include "viennacl/ocl/context.hpp"
+
+namespace viennacl
+{
+
+namespace device_specific
+{
+
+  class lazy_program_compiler
+  {
+  public:
+
+    lazy_program_compiler(viennacl::ocl::context * ctx, std::string const & name, std::string const & src, bool force_recompilation) : ctx_(ctx), name_(name), src_(src), force_recompilation_(force_recompilation){ }
+    lazy_program_compiler(viennacl::ocl::context * ctx, std::string const & name, bool force_recompilation) : ctx_(ctx), name_(name), force_recompilation_(force_recompilation){ }
+
+    void add(std::string const & src) {  src_+=src; }
+
+    std::string const & src() const { return src_; }
+
+    viennacl::ocl::program & program()
+    {
+      if (force_recompilation_ && ctx_->has_program(name_))
+        ctx_->delete_program(name_);
+      if (!ctx_->has_program(name_))
+      {
+#ifdef VIENNACL_BUILD_INFO
+          std::cerr << "Creating program " << program_name << std::endl;
+#endif
+          ctx_->add_program(src_, name_);
+#ifdef VIENNACL_BUILD_INFO
+          std::cerr << "Done creating program " << program_name << std::endl;
+#endif
+      }
+      return ctx_->get_program(name_);
+    }
+
+  private:
+    viennacl::ocl::context * ctx_;
+    std::string name_;
+    std::string src_;
+    bool force_recompilation_;
+  };
+
+}
+
+}
+#endif