You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ap...@apache.org on 2016/06/08 21:40:45 UTC

[48/51] [partial] mahout git commit: (nojira) add native-viennaCL module to codebase. closes apache/mahout#241

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas2_opencl.cu
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas2_opencl.cu b/native-viennaCL/src/main/cpp/libviennacl/src/blas2_opencl.cu
new file mode 100644
index 0000000..20c4994
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas2_opencl.cu
@@ -0,0 +1,219 @@
+/* =========================================================================
+   Copyright (c) 2010-2014, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the PDF manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+// include necessary system headers
+#include <iostream>
+
+#include "viennacl.hpp"
+#include "viennacl_private.hpp"
+
+//include basic scalar and vector types of ViennaCL
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/linalg/direct_solve.hpp"
+#include "viennacl/linalg/prod.hpp"
+
+
+// xGEMV
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemv(ViennaCLBackend backend,
+                                                              ViennaCLOrder order, ViennaCLTranspose transA,
+                                                              ViennaCLInt m, ViennaCLInt n, float alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                              cl_mem x, ViennaCLInt offx, ViennaCLInt incx,
+                                                              float beta,
+                                                              cl_mem y, ViennaCLInt offy, ViennaCLInt incy)
+{
+  typedef viennacl::vector_base<float>::size_type           size_type;
+  typedef viennacl::vector_base<float>::size_type           difference_type;
+
+  viennacl::vector_base<float> v1(x, size_type(n), size_type(offx), difference_type(incx), viennacl::ocl::get_context(backend->opencl_backend.context_id));
+  viennacl::vector_base<float> v2(y, size_type(m), size_type(offy), difference_type(incy), viennacl::ocl::get_context(backend->opencl_backend.context_id));
+  viennacl::matrix_base<float> mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id),
+                                   size_type(m), size_type(offA_row), difference_type(incA_row), size_type(m),
+                                   size_type(n), size_type(offA_col), difference_type(incA_col), size_type(lda), order == ViennaCLRowMajor);
+  v2 *= beta;
+  if (transA == ViennaCLTrans)
+    v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1);
+  else
+    v2 += alpha * viennacl::linalg::prod(mat, v1);
+
+  return ViennaCLSuccess;
+}
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemv(ViennaCLBackend backend,
+                                                              ViennaCLOrder order, ViennaCLTranspose transA,
+                                                              ViennaCLInt m, ViennaCLInt n, double alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                              cl_mem x, ViennaCLInt offx, ViennaCLInt incx,
+                                                              double beta,
+                                                              cl_mem y, ViennaCLInt offy, ViennaCLInt incy)
+{
+  typedef viennacl::vector_base<double>::size_type           size_type;
+  typedef viennacl::vector_base<double>::size_type           difference_type;
+
+  viennacl::vector_base<double> v1(x, size_type(n), size_type(offx), difference_type(incx), viennacl::ocl::get_context(backend->opencl_backend.context_id));
+  viennacl::vector_base<double> v2(y, size_type(m), size_type(offy), difference_type(incy), viennacl::ocl::get_context(backend->opencl_backend.context_id));
+  viennacl::matrix_base<double> mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id),
+                                    size_type(m), size_type(offA_row), difference_type(incA_row), size_type(m),
+                                    size_type(n), size_type(offA_col), difference_type(incA_col), size_type(lda), order == ViennaCLRowMajor);
+  v2 *= beta;
+  if (transA == ViennaCLTrans)
+    v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1);
+  else
+    v2 += alpha * viennacl::linalg::prod(mat, v1);
+
+  return ViennaCLSuccess;
+}
+
+
+
+// xTRSV
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLStrsv(ViennaCLBackend backend,
+                                                              ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag,
+                                                              ViennaCLInt n, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                              cl_mem x, ViennaCLInt offx, ViennaCLInt incx)
+{
+  typedef viennacl::vector_base<float>::size_type           size_type;
+  typedef viennacl::vector_base<float>::size_type           difference_type;
+
+  viennacl::vector_base<float> v(x, size_type(n), size_type(offx), difference_type(incx), viennacl::ocl::get_context(backend->opencl_backend.context_id));
+  viennacl::matrix_base<float> mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id),
+                                   size_type(n), size_type(offA_row), difference_type(incA_row), size_type(n),
+                                   size_type(n), size_type(offA_col), difference_type(incA_col), size_type(lda), order == ViennaCLRowMajor);
+  if (transA == ViennaCLTrans)
+  {
+    if (uplo == ViennaCLUpper)
+      if (diag == ViennaCLUnit)
+        viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_upper_tag());
+      else
+        viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag());
+    else
+      if (diag == ViennaCLUnit)
+        viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_lower_tag());
+      else
+        viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag());
+  }
+  else
+  {
+    if (uplo == ViennaCLUpper)
+      if (diag == ViennaCLUnit)
+        viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_upper_tag());
+      else
+        viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag());
+    else
+      if (diag == ViennaCLUnit)
+        viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_lower_tag());
+      else
+        viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag());
+  }
+
+  return ViennaCLSuccess;
+}
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDtrsv(ViennaCLBackend backend,
+                                                              ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag,
+                                                              ViennaCLInt n, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                              cl_mem x, ViennaCLInt offx, ViennaCLInt incx)
+{
+  typedef viennacl::vector_base<double>::size_type           size_type;
+  typedef viennacl::vector_base<double>::size_type           difference_type;
+
+  viennacl::vector_base<double> v(x, size_type(n), size_type(offx), difference_type(incx), viennacl::ocl::get_context(backend->opencl_backend.context_id));
+  viennacl::matrix_base<double> mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id),
+                                    size_type(n), size_type(offA_row), difference_type(incA_row), size_type(n),
+                                    size_type(n), size_type(offA_col), difference_type(incA_col), size_type(lda), order == ViennaCLRowMajor);
+  if (transA == ViennaCLTrans)
+  {
+    if (uplo == ViennaCLUpper)
+      if (diag == ViennaCLUnit)
+        viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_upper_tag());
+      else
+        viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag());
+    else
+      if (diag == ViennaCLUnit)
+        viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_lower_tag());
+      else
+        viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag());
+  }
+  else
+  {
+    if (uplo == ViennaCLUpper)
+      if (diag == ViennaCLUnit)
+        viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_upper_tag());
+      else
+        viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag());
+    else
+      if (diag == ViennaCLUnit)
+        viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_lower_tag());
+      else
+        viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag());
+  }
+
+  return ViennaCLSuccess;
+}
+
+
+
+// xGER
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSger(ViennaCLBackend backend,
+                                                             ViennaCLOrder order,
+                                                             ViennaCLInt m, ViennaCLInt n,
+                                                             float alpha,
+                                                             cl_mem x, ViennaCLInt offx, ViennaCLInt incx,
+                                                             cl_mem y, ViennaCLInt offy, ViennaCLInt incy,
+                                                             cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda)
+{
+  typedef viennacl::vector_base<float>::size_type           size_type;
+  typedef viennacl::vector_base<float>::size_type           difference_type;
+
+  viennacl::vector_base<float> v1(x, size_type(n), size_type(offx), difference_type(incx), viennacl::ocl::get_context(backend->opencl_backend.context_id));
+  viennacl::vector_base<float> v2(y, size_type(m), size_type(offy), difference_type(incy), viennacl::ocl::get_context(backend->opencl_backend.context_id));
+  viennacl::matrix_base<float> mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id),
+                                   size_type(m), size_type(offA_row), difference_type(incA_row), size_type(m),
+                                   size_type(n), size_type(offA_col), difference_type(incA_col), size_type(lda), order == ViennaCLRowMajor);
+
+  mat += alpha * viennacl::linalg::outer_prod(v1, v2);
+
+  return ViennaCLSuccess;
+}
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDger(ViennaCLBackend backend,
+                                                             ViennaCLOrder order,
+                                                             ViennaCLInt m, ViennaCLInt n,
+                                                             double alpha,
+                                                             cl_mem x, ViennaCLInt offx, ViennaCLInt incx,
+                                                             cl_mem y, ViennaCLInt offy, ViennaCLInt incy,
+                                                             cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda)
+{
+  typedef viennacl::vector_base<double>::size_type           size_type;
+  typedef viennacl::vector_base<double>::size_type           difference_type;
+
+  viennacl::vector_base<double> v1(x, size_type(n), size_type(offx), difference_type(incx), viennacl::ocl::get_context(backend->opencl_backend.context_id));
+  viennacl::vector_base<double> v2(y, size_type(m), size_type(offy), difference_type(incy), viennacl::ocl::get_context(backend->opencl_backend.context_id));
+  viennacl::matrix_base<double> mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id),
+                                    size_type(m), size_type(offA_row), difference_type(incA_row), size_type(m),
+                                    size_type(n), size_type(offA_col), difference_type(incA_col), size_type(lda), order == ViennaCLRowMajor);
+
+  mat += alpha * viennacl::linalg::outer_prod(v1, v2);
+
+  return ViennaCLSuccess;
+}
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cpp b/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cpp
new file mode 100644
index 0000000..bb6e03e
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cpp
@@ -0,0 +1,272 @@
+/* =========================================================================
+   Copyright (c) 2010-2014, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the PDF manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+// include necessary system headers
+#include <iostream>
+
+#include "viennacl.hpp"
+#include "viennacl_private.hpp"
+
+#include "init_matrix.hpp"
+
+//include basic scalar and vector types of ViennaCL
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/linalg/direct_solve.hpp"
+#include "viennacl/linalg/prod.hpp"
+
+// GEMV
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemm(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLMatrix B, ViennaCLHostScalar beta, ViennaCLMatrix C)
+{
+  viennacl::backend::mem_handle A_handle;
+  viennacl::backend::mem_handle B_handle;
+  viennacl::backend::mem_handle C_handle;
+
+  if (init_matrix(A_handle, A) != ViennaCLSuccess)
+    return ViennaCLGenericFailure;
+
+  if (init_matrix(B_handle, B) != ViennaCLSuccess)
+    return ViennaCLGenericFailure;
+
+  if (init_matrix(C_handle, C) != ViennaCLSuccess)
+    return ViennaCLGenericFailure;
+
+  switch (A->precision)
+  {
+    case ViennaCLFloat:
+    {
+      typedef viennacl::matrix_base<float>::size_type           size_type;
+      typedef viennacl::matrix_base<float>::size_type           difference_type;
+
+      viennacl::matrix_base<float> mat_A(A_handle,
+                                         size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1),
+                                         size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor);
+      viennacl::matrix_base<float> mat_B(B_handle,
+                                         size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1),
+                                         size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor);
+      viennacl::matrix_base<float> mat_C(C_handle,
+                                         size_type(C->size1), size_type(C->start1), difference_type(C->stride1), size_type(C->internal_size1),
+                                         size_type(C->size2), size_type(C->start2), difference_type(C->stride2), size_type(C->internal_size2), C->order == ViennaCLRowMajor);
+
+      if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans)
+        viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float);
+      else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans)
+        viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float);
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans)
+        viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float);
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans)
+        viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float);
+      else
+        return ViennaCLGenericFailure;
+
+      return ViennaCLSuccess;
+    }
+
+    case ViennaCLDouble:
+    {
+      typedef viennacl::matrix_base<double>::size_type           size_type;
+      typedef viennacl::matrix_base<double>::size_type           difference_type;
+
+      viennacl::matrix_base<double> mat_A(A_handle,
+                                          size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1),
+                                          size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor);
+      viennacl::matrix_base<double> mat_B(B_handle,
+                                          size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1),
+                                          size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor);
+      viennacl::matrix_base<double> mat_C(C_handle,
+                                          size_type(C->size1), size_type(C->start1), difference_type(C->stride1), size_type(C->internal_size1),
+                                          size_type(C->size2), size_type(C->start2), difference_type(C->stride2), size_type(C->internal_size2), C->order == ViennaCLRowMajor);
+
+      if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans)
+        viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double);
+      else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans)
+        viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double);
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans)
+        viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double);
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans)
+        viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double);
+      else
+        return ViennaCLGenericFailure;
+
+      return ViennaCLSuccess;
+    }
+
+    default:
+      return ViennaCLGenericFailure;
+  }
+}
+
+
+// xTRSV
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsm(ViennaCLMatrix A, ViennaCLUplo uplo, ViennaCLDiag diag, ViennaCLMatrix B)
+{
+  viennacl::backend::mem_handle A_handle;
+  viennacl::backend::mem_handle B_handle;
+
+  if (init_matrix(A_handle, A) != ViennaCLSuccess)
+    return ViennaCLGenericFailure;
+
+  if (init_matrix(B_handle, B) != ViennaCLSuccess)
+    return ViennaCLGenericFailure;
+
+  switch (A->precision)
+  {
+    case ViennaCLFloat:
+    {
+      typedef viennacl::matrix_base<float>::size_type           size_type;
+      typedef viennacl::matrix_base<float>::size_type           difference_type;
+
+      viennacl::matrix_base<float> mat_A(A_handle,
+                                         size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1),
+                                         size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor);
+      viennacl::matrix_base<float> mat_B(B_handle,
+                                         size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1),
+                                         size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor);
+
+      if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+      else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+
+      return ViennaCLSuccess;
+    }
+    case ViennaCLDouble:
+    {
+      typedef viennacl::matrix_base<double>::size_type           size_type;
+      typedef viennacl::matrix_base<double>::size_type           difference_type;
+
+      viennacl::matrix_base<double> mat_A(A_handle,
+                                          size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1),
+                                          size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor);
+      viennacl::matrix_base<double> mat_B(B_handle,
+                                          size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1),
+                                          size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor);
+
+      if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+      else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+
+      return ViennaCLSuccess;
+    }
+
+    default:
+      return  ViennaCLGenericFailure;
+  }
+}
+
+
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cu
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cu b/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cu
new file mode 100644
index 0000000..bb6e03e
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3.cu
@@ -0,0 +1,272 @@
+/* =========================================================================
+   Copyright (c) 2010-2014, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the PDF manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+// include necessary system headers
+#include <iostream>
+
+#include "viennacl.hpp"
+#include "viennacl_private.hpp"
+
+#include "init_matrix.hpp"
+
+//include basic scalar and vector types of ViennaCL
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/linalg/direct_solve.hpp"
+#include "viennacl/linalg/prod.hpp"
+
+// GEMV
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemm(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLMatrix B, ViennaCLHostScalar beta, ViennaCLMatrix C)
+{
+  viennacl::backend::mem_handle A_handle;
+  viennacl::backend::mem_handle B_handle;
+  viennacl::backend::mem_handle C_handle;
+
+  if (init_matrix(A_handle, A) != ViennaCLSuccess)
+    return ViennaCLGenericFailure;
+
+  if (init_matrix(B_handle, B) != ViennaCLSuccess)
+    return ViennaCLGenericFailure;
+
+  if (init_matrix(C_handle, C) != ViennaCLSuccess)
+    return ViennaCLGenericFailure;
+
+  switch (A->precision)
+  {
+    case ViennaCLFloat:
+    {
+      typedef viennacl::matrix_base<float>::size_type           size_type;
+      typedef viennacl::matrix_base<float>::size_type           difference_type;
+
+      viennacl::matrix_base<float> mat_A(A_handle,
+                                         size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1),
+                                         size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor);
+      viennacl::matrix_base<float> mat_B(B_handle,
+                                         size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1),
+                                         size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor);
+      viennacl::matrix_base<float> mat_C(C_handle,
+                                         size_type(C->size1), size_type(C->start1), difference_type(C->stride1), size_type(C->internal_size1),
+                                         size_type(C->size2), size_type(C->start2), difference_type(C->stride2), size_type(C->internal_size2), C->order == ViennaCLRowMajor);
+
+      if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans)
+        viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float);
+      else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans)
+        viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float);
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans)
+        viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float);
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans)
+        viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float);
+      else
+        return ViennaCLGenericFailure;
+
+      return ViennaCLSuccess;
+    }
+
+    case ViennaCLDouble:
+    {
+      typedef viennacl::matrix_base<double>::size_type           size_type;
+      typedef viennacl::matrix_base<double>::size_type           difference_type;
+
+      viennacl::matrix_base<double> mat_A(A_handle,
+                                          size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1),
+                                          size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor);
+      viennacl::matrix_base<double> mat_B(B_handle,
+                                          size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1),
+                                          size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor);
+      viennacl::matrix_base<double> mat_C(C_handle,
+                                          size_type(C->size1), size_type(C->start1), difference_type(C->stride1), size_type(C->internal_size1),
+                                          size_type(C->size2), size_type(C->start2), difference_type(C->stride2), size_type(C->internal_size2), C->order == ViennaCLRowMajor);
+
+      if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans)
+        viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double);
+      else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans)
+        viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double);
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans)
+        viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double);
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans)
+        viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double);
+      else
+        return ViennaCLGenericFailure;
+
+      return ViennaCLSuccess;
+    }
+
+    default:
+      return ViennaCLGenericFailure;
+  }
+}
+
+
+// xTRSV
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsm(ViennaCLMatrix A, ViennaCLUplo uplo, ViennaCLDiag diag, ViennaCLMatrix B)
+{
+  viennacl::backend::mem_handle A_handle;
+  viennacl::backend::mem_handle B_handle;
+
+  if (init_matrix(A_handle, A) != ViennaCLSuccess)
+    return ViennaCLGenericFailure;
+
+  if (init_matrix(B_handle, B) != ViennaCLSuccess)
+    return ViennaCLGenericFailure;
+
+  switch (A->precision)
+  {
+    case ViennaCLFloat:
+    {
+      typedef viennacl::matrix_base<float>::size_type           size_type;
+      typedef viennacl::matrix_base<float>::size_type           difference_type;
+
+      viennacl::matrix_base<float> mat_A(A_handle,
+                                         size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1),
+                                         size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor);
+      viennacl::matrix_base<float> mat_B(B_handle,
+                                         size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1),
+                                         size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor);
+
+      if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+      else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+
+      return ViennaCLSuccess;
+    }
+    case ViennaCLDouble:
+    {
+      typedef viennacl::matrix_base<double>::size_type           size_type;
+      typedef viennacl::matrix_base<double>::size_type           difference_type;
+
+      viennacl::matrix_base<double> mat_A(A_handle,
+                                          size_type(A->size1), size_type(A->start1), difference_type(A->stride1), size_type(A->internal_size1),
+                                          size_type(A->size2), size_type(A->start2), difference_type(A->stride2), size_type(A->internal_size2), A->order == ViennaCLRowMajor);
+      viennacl::matrix_base<double> mat_B(B_handle,
+                                          size_type(B->size1), size_type(B->start1), difference_type(B->stride1), size_type(B->internal_size1),
+                                          size_type(B->size2), size_type(B->start2), difference_type(B->stride2), size_type(B->internal_size2), B->order == ViennaCLRowMajor);
+
+      if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+      else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+      else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans)
+      {
+        if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag());
+        else if (uplo == ViennaCLUpper && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag());
+        else if (uplo == ViennaCLLower && diag == ViennaCLUnit)
+          viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag());
+        else
+          return ViennaCLGenericFailure;
+      }
+
+      return ViennaCLSuccess;
+    }
+
+    default:
+      return  ViennaCLGenericFailure;
+  }
+}
+
+
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/blas3.hpp
new file mode 100644
index 0000000..cfcc034
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3.hpp
@@ -0,0 +1,60 @@
+#ifndef VIENNACL_SRC_BLAS3_HPP
+#define VIENNACL_SRC_BLAS3_HPP
+
+/* =========================================================================
+   Copyright (c) 2010-2014, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the PDF manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+// include necessary system headers
+#include <iostream>
+
+#include "viennacl.hpp"
+#include "viennacl_private.hpp"
+
+//include basic scalar and vector types of ViennaCL
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/linalg/direct_solve.hpp"
+#include "viennacl/linalg/prod.hpp"
+
+namespace detail
+{
+  template <typename ScalarType, typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC>
+  void gemm_dispatch(ScalarType alpha,
+                     MatrixTypeA const & A, ViennaCLTranspose transA,
+                     MatrixTypeB const & B, ViennaCLTranspose transB,
+                     ScalarType beta,
+                     MatrixTypeC & C)
+  {
+
+    if (transA == ViennaCLTrans && transB == ViennaCLTrans)
+      viennacl::linalg::prod_impl(viennacl::trans(A), viennacl::trans(B), C, alpha, beta);
+    else if (transA == ViennaCLTrans && transB == ViennaCLNoTrans)
+      viennacl::linalg::prod_impl(viennacl::trans(A), B, C, alpha, beta);
+    else if (transA == ViennaCLNoTrans && transB == ViennaCLTrans)
+      viennacl::linalg::prod_impl(A, viennacl::trans(B), C, alpha, beta);
+    else if (transA == ViennaCLNoTrans && transB == ViennaCLNoTrans)
+      viennacl::linalg::prod_impl(A, B, C, alpha, beta);
+    //else
+    //  return ViennaCLGenericFailure;
+  }
+}
+
+
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3_cuda.cu
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3_cuda.cu b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_cuda.cu
new file mode 100644
index 0000000..318593b
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_cuda.cu
@@ -0,0 +1,133 @@
+/* =========================================================================
+   Copyright (c) 2010-2014, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the PDF manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+// include necessary system headers
+#include <iostream>
+
+#include "viennacl.hpp"
+#include "viennacl_private.hpp"
+
+#include "blas3.hpp"
+
+//include basic scalar and vector types of ViennaCL
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/linalg/direct_solve.hpp"
+#include "viennacl/linalg/prod.hpp"
+
+
+#ifdef VIENNACL_WITH_CUDA
+
+
+
+//
+// xGEMV
+//
+
+namespace detail
+{
+  template <typename NumericT>
+  ViennaCLStatus ViennaCLCUDAgemm_impl(ViennaCLBackend /*backend*/,
+                                       ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                       ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                       ViennaCLOrder orderC,
+                                       ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                       NumericT alpha,
+                                       NumericT *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                       NumericT *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                       NumericT beta,
+                                       NumericT *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+  {
+    ViennaCLInt A_size1 = (transA == ViennaCLTrans) ? k : m;
+    ViennaCLInt A_size2 = (transA == ViennaCLTrans) ? m : k;
+
+    ViennaCLInt B_size1 = (transB == ViennaCLTrans) ? n : k;
+    ViennaCLInt B_size2 = (transB == ViennaCLTrans) ? k : n;
+
+    bool A_row_major = (orderA == ViennaCLRowMajor);
+    bool B_row_major = (orderB == ViennaCLRowMajor);
+    bool C_row_major = (orderC == ViennaCLRowMajor);
+
+    viennacl::matrix_base<NumericT> matA(A, viennacl::CUDA_MEMORY,
+                                         A_size1, offA_row, incA_row, A_row_major ? m : lda,
+                                         A_size2, offA_col, incA_col, A_row_major ? lda : k, A_row_major);
+
+    viennacl::matrix_base<NumericT> matB(B, viennacl::CUDA_MEMORY,
+                                         B_size1, offB_row, incB_row, B_row_major ? k : ldb,
+                                         B_size2, offB_col, incB_col, B_row_major ? ldb : n, B_row_major);
+
+    viennacl::matrix_base<NumericT> matC(C, viennacl::CUDA_MEMORY,
+                                         m, offC_row, incC_row, C_row_major ? m : ldc,
+                                         n, offC_col, incC_col, C_row_major ? ldc : n, C_row_major);
+
+    detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC);
+
+    return ViennaCLSuccess;
+  }
+
+}
+
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASgemm(ViennaCLBackend backend,
+                                                            ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                                            ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                                            ViennaCLOrder orderC,
+                                                            ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                                            float alpha,
+                                                            float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                            float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                                            float beta,
+                                                            float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+{
+  return detail::ViennaCLCUDAgemm_impl<float>(backend,
+                                              orderA, transA,
+                                              orderB, transB,
+                                              orderC,
+                                              m, n, k,
+                                              alpha,
+                                              A, offA_row, offA_col, incA_row, incA_col, lda,
+                                              B, offB_row, offB_col, incB_row, incB_col, ldb,
+                                              beta,
+                                              C, offC_row, offC_col, incC_row, incC_col, ldc);
+}
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADgemm(ViennaCLBackend backend,
+                                                            ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                                            ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                                            ViennaCLOrder orderC,
+                                                            ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                                            double alpha,
+                                                            double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                            double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                                            double beta,
+                                                            double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+{
+  return detail::ViennaCLCUDAgemm_impl<double>(backend,
+                                               orderA, transA,
+                                               orderB, transB,
+                                               orderC,
+                                               m, n, k,
+                                               alpha,
+                                               A, offA_row, offA_col, incA_row, incA_col, lda,
+                                               B, offB_row, offB_col, incB_row, incB_col, ldb,
+                                               beta,
+                                               C, offC_row, offC_col, incC_row, incC_col, ldc);
+}
+
+
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cpp b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cpp
new file mode 100644
index 0000000..16ef310
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cpp
@@ -0,0 +1,131 @@
+/* =========================================================================
+   Copyright (c) 2010-2014, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the PDF manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+// include necessary system headers
+#include <iostream>
+
+#include "viennacl.hpp"
+#include "viennacl_private.hpp"
+
+#include "blas3.hpp"
+
+//include basic scalar and vector types of ViennaCL
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/linalg/direct_solve.hpp"
+#include "viennacl/linalg/prod.hpp"
+
+
+//
+// xGEMV
+//
+
+namespace detail
+{
+  template <typename NumericT>
+  ViennaCLStatus ViennaCLHostgemm_impl(ViennaCLBackend /*backend*/,
+                                       ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                       ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                       ViennaCLOrder orderC,
+                                       ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                       NumericT alpha,
+                                       NumericT *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                       NumericT *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                       NumericT beta,
+                                       NumericT *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+  {
+    typedef typename viennacl::matrix_base<NumericT>::size_type           size_type;
+    typedef typename viennacl::matrix_base<NumericT>::size_type           difference_type;
+
+    size_type A_size1 = static_cast<size_type>((transA == ViennaCLTrans) ? k : m);
+    size_type A_size2 = static_cast<size_type>((transA == ViennaCLTrans) ? m : k);
+
+    size_type B_size1 = static_cast<size_type>((transB == ViennaCLTrans) ? n : k);
+    size_type B_size2 = static_cast<size_type>((transB == ViennaCLTrans) ? k : n);
+
+    bool A_row_major = (orderA == ViennaCLRowMajor);
+    bool B_row_major = (orderB == ViennaCLRowMajor);
+    bool C_row_major = (orderC == ViennaCLRowMajor);
+
+    viennacl::matrix_base<NumericT> matA(A, viennacl::MAIN_MEMORY,
+                                         A_size1, size_type(offA_row), difference_type(incA_row), size_type(A_row_major ? m : lda),
+                                         A_size2, size_type(offA_col), difference_type(incA_col), size_type(A_row_major ? lda : k), A_row_major);
+
+    viennacl::matrix_base<NumericT> matB(B, viennacl::MAIN_MEMORY,
+                                         B_size1, size_type(offB_row), difference_type(incB_row), size_type(B_row_major ? k : ldb),
+                                         B_size2, size_type(offB_col), difference_type(incB_col), size_type(B_row_major ? ldb : n), B_row_major);
+
+    viennacl::matrix_base<NumericT> matC(C, viennacl::MAIN_MEMORY,
+                                         size_type(m), size_type(offC_row), difference_type(incC_row), size_type(C_row_major ? m : ldc),
+                                         size_type(n), size_type(offC_col), difference_type(incC_col), size_type(C_row_major ? ldc : n), C_row_major);
+
+    detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC);
+
+    return ViennaCLSuccess;
+  }
+
+}
+
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemm(ViennaCLBackend backend,
+                                                            ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                                            ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                                            ViennaCLOrder orderC,
+                                                            ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                                            float alpha,
+                                                            float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                            float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                                            float beta,
+                                                            float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+{
+  return detail::ViennaCLHostgemm_impl<float>(backend,
+                                              orderA, transA,
+                                              orderB, transB,
+                                              orderC,
+                                              m, n, k,
+                                              alpha,
+                                              A, offA_row, offA_col, incA_row, incA_col, lda,
+                                              B, offB_row, offB_col, incB_row, incB_col, ldb,
+                                              beta,
+                                              C, offC_row, offC_col, incC_row, incC_col, ldc);
+}
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemm(ViennaCLBackend backend,
+                                                            ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                                            ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                                            ViennaCLOrder orderC,
+                                                            ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                                            double alpha,
+                                                            double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                            double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                                            double beta,
+                                                            double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+{
+  return detail::ViennaCLHostgemm_impl<double>(backend,
+                                               orderA, transA,
+                                               orderB, transB,
+                                               orderC,
+                                               m, n, k,
+                                               alpha,
+                                               A, offA_row, offA_col, incA_row, incA_col, lda,
+                                               B, offB_row, offB_col, incB_row, incB_col, ldb,
+                                               beta,
+                                               C, offC_row, offC_col, incC_row, incC_col, ldc);
+}
+
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cu
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cu b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cu
new file mode 100644
index 0000000..16ef310
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_host.cu
@@ -0,0 +1,131 @@
+/* =========================================================================
+   Copyright (c) 2010-2014, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the PDF manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+// include necessary system headers
+#include <iostream>
+
+#include "viennacl.hpp"
+#include "viennacl_private.hpp"
+
+#include "blas3.hpp"
+
+//include basic scalar and vector types of ViennaCL
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/linalg/direct_solve.hpp"
+#include "viennacl/linalg/prod.hpp"
+
+
+//
+// xGEMV
+//
+
+namespace detail
+{
+  template <typename NumericT>
+  ViennaCLStatus ViennaCLHostgemm_impl(ViennaCLBackend /*backend*/,
+                                       ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                       ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                       ViennaCLOrder orderC,
+                                       ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                       NumericT alpha,
+                                       NumericT *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                       NumericT *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                       NumericT beta,
+                                       NumericT *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+  {
+    typedef typename viennacl::matrix_base<NumericT>::size_type           size_type;
+    typedef typename viennacl::matrix_base<NumericT>::size_type           difference_type;
+
+    size_type A_size1 = static_cast<size_type>((transA == ViennaCLTrans) ? k : m);
+    size_type A_size2 = static_cast<size_type>((transA == ViennaCLTrans) ? m : k);
+
+    size_type B_size1 = static_cast<size_type>((transB == ViennaCLTrans) ? n : k);
+    size_type B_size2 = static_cast<size_type>((transB == ViennaCLTrans) ? k : n);
+
+    bool A_row_major = (orderA == ViennaCLRowMajor);
+    bool B_row_major = (orderB == ViennaCLRowMajor);
+    bool C_row_major = (orderC == ViennaCLRowMajor);
+
+    viennacl::matrix_base<NumericT> matA(A, viennacl::MAIN_MEMORY,
+                                         A_size1, size_type(offA_row), difference_type(incA_row), size_type(A_row_major ? m : lda),
+                                         A_size2, size_type(offA_col), difference_type(incA_col), size_type(A_row_major ? lda : k), A_row_major);
+
+    viennacl::matrix_base<NumericT> matB(B, viennacl::MAIN_MEMORY,
+                                         B_size1, size_type(offB_row), difference_type(incB_row), size_type(B_row_major ? k : ldb),
+                                         B_size2, size_type(offB_col), difference_type(incB_col), size_type(B_row_major ? ldb : n), B_row_major);
+
+    viennacl::matrix_base<NumericT> matC(C, viennacl::MAIN_MEMORY,
+                                         size_type(m), size_type(offC_row), difference_type(incC_row), size_type(C_row_major ? m : ldc),
+                                         size_type(n), size_type(offC_col), difference_type(incC_col), size_type(C_row_major ? ldc : n), C_row_major);
+
+    detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC);
+
+    return ViennaCLSuccess;
+  }
+
+}
+
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemm(ViennaCLBackend backend,
+                                                            ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                                            ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                                            ViennaCLOrder orderC,
+                                                            ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                                            float alpha,
+                                                            float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                            float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                                            float beta,
+                                                            float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+{
+  return detail::ViennaCLHostgemm_impl<float>(backend,
+                                              orderA, transA,
+                                              orderB, transB,
+                                              orderC,
+                                              m, n, k,
+                                              alpha,
+                                              A, offA_row, offA_col, incA_row, incA_col, lda,
+                                              B, offB_row, offB_col, incB_row, incB_col, ldb,
+                                              beta,
+                                              C, offC_row, offC_col, incC_row, incC_col, ldc);
+}
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemm(ViennaCLBackend backend,
+                                                            ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                                            ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                                            ViennaCLOrder orderC,
+                                                            ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                                            double alpha,
+                                                            double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                            double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                                            double beta,
+                                                            double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+{
+  return detail::ViennaCLHostgemm_impl<double>(backend,
+                                               orderA, transA,
+                                               orderB, transB,
+                                               orderC,
+                                               m, n, k,
+                                               alpha,
+                                               A, offA_row, offA_col, incA_row, incA_col, lda,
+                                               B, offB_row, offB_col, incB_row, incB_col, ldb,
+                                               beta,
+                                               C, offC_row, offC_col, incC_row, incC_col, ldc);
+}
+
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cpp b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cpp
new file mode 100644
index 0000000..d5e5c1e
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cpp
@@ -0,0 +1,136 @@
+/* =========================================================================
+   Copyright (c) 2010-2014, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the PDF manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+// include necessary system headers
+#include <iostream>
+
+#include "viennacl.hpp"
+#include "viennacl_private.hpp"
+
+#include "blas3.hpp"
+
+//include basic scalar and vector types of ViennaCL
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/linalg/direct_solve.hpp"
+#include "viennacl/linalg/prod.hpp"
+
+
+#ifdef VIENNACL_WITH_OPENCL
+
+
+
+//
+// xGEMV
+//
+
+namespace detail
+{
+  template <typename NumericT>
+  ViennaCLStatus ViennaCLOpenCLgemm_impl(ViennaCLBackend backend,
+                                         ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                         ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                         ViennaCLOrder orderC,
+                                         ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                         NumericT alpha,
+                                         cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                         cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                         NumericT beta,
+                                         cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+  {
+    typedef typename viennacl::matrix_base<NumericT>::size_type           size_type;
+    typedef typename viennacl::matrix_base<NumericT>::size_type           difference_type;
+
+    size_type A_size1 = static_cast<size_type>((transA == ViennaCLTrans) ? k : m);
+    size_type A_size2 = static_cast<size_type>((transA == ViennaCLTrans) ? m : k);
+
+    size_type B_size1 = static_cast<size_type>((transB == ViennaCLTrans) ? n : k);
+    size_type B_size2 = static_cast<size_type>((transB == ViennaCLTrans) ? k : n);
+
+    bool A_row_major = (orderA == ViennaCLRowMajor);
+    bool B_row_major = (orderB == ViennaCLRowMajor);
+    bool C_row_major = (orderC == ViennaCLRowMajor);
+
+    viennacl::matrix_base<NumericT> matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id),
+                                         A_size1, size_type(offA_row), difference_type(incA_row), size_type(A_row_major ? m : lda),
+                                         A_size2, size_type(offA_col), difference_type(incA_col), size_type(A_row_major ? lda : k), A_row_major);
+
+    viennacl::matrix_base<NumericT> matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id),
+                                         B_size1, size_type(offB_row), difference_type(incB_row), size_type(B_row_major ? k : ldb),
+                                         B_size2, size_type(offB_col), difference_type(incB_col), size_type(B_row_major ? ldb : n), B_row_major);
+
+    viennacl::matrix_base<NumericT> matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id),
+                                         size_type(m), size_type(offC_row), difference_type(incC_row), size_type(C_row_major ? m : ldc),
+                                         size_type(n), size_type(offC_col), difference_type(incC_col), size_type(C_row_major ? ldc : n), C_row_major);
+
+    detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC);
+
+    return ViennaCLSuccess;
+  }
+
+}
+
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemm(ViennaCLBackend backend,
+                                                              ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                                              ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                                              ViennaCLOrder orderC,
+                                                              ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                                              float alpha,
+                                                              cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                              cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                                              float beta,
+                                                              cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+{
+  return detail::ViennaCLOpenCLgemm_impl<float>(backend,
+                                                orderA, transA,
+                                                orderB, transB,
+                                                orderC,
+                                                m, n, k,
+                                                alpha,
+                                                A, offA_row, offA_col, incA_row, incA_col, lda,
+                                                B, offB_row, offB_col, incB_row, incB_col, ldb,
+                                                beta,
+                                                C, offC_row, offC_col, incC_row, incC_col, ldc);
+}
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemm(ViennaCLBackend backend,
+                                                              ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                                              ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                                              ViennaCLOrder orderC,
+                                                              ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                                              double alpha,
+                                                              cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                              cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                                              double beta,
+                                                              cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+{
+  return detail::ViennaCLOpenCLgemm_impl<double>(backend,
+                                                 orderA, transA,
+                                                 orderB, transB,
+                                                 orderC,
+                                                 m, n, k,
+                                                 alpha,
+                                                 A, offA_row, offA_col, incA_row, incA_col, lda,
+                                                 B, offB_row, offB_col, incB_row, incB_col, ldb,
+                                                 beta,
+                                                 C, offC_row, offC_col, incC_row, incC_col, ldc);
+}
+
+
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cu
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cu b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cu
new file mode 100644
index 0000000..d5e5c1e
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/libviennacl/src/blas3_opencl.cu
@@ -0,0 +1,136 @@
+/* =========================================================================
+   Copyright (c) 2010-2014, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the PDF manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+// include necessary system headers
+#include <iostream>
+
+#include "viennacl.hpp"
+#include "viennacl_private.hpp"
+
+#include "blas3.hpp"
+
+//include basic scalar and vector types of ViennaCL
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/linalg/direct_solve.hpp"
+#include "viennacl/linalg/prod.hpp"
+
+
+#ifdef VIENNACL_WITH_OPENCL
+
+
+
+//
+// xGEMV
+//
+
+namespace detail
+{
+  template <typename NumericT>
+  ViennaCLStatus ViennaCLOpenCLgemm_impl(ViennaCLBackend backend,
+                                         ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                         ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                         ViennaCLOrder orderC,
+                                         ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                         NumericT alpha,
+                                         cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                         cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                         NumericT beta,
+                                         cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+  {
+    typedef typename viennacl::matrix_base<NumericT>::size_type           size_type;
+    typedef typename viennacl::matrix_base<NumericT>::size_type           difference_type;
+
+    size_type A_size1 = static_cast<size_type>((transA == ViennaCLTrans) ? k : m);
+    size_type A_size2 = static_cast<size_type>((transA == ViennaCLTrans) ? m : k);
+
+    size_type B_size1 = static_cast<size_type>((transB == ViennaCLTrans) ? n : k);
+    size_type B_size2 = static_cast<size_type>((transB == ViennaCLTrans) ? k : n);
+
+    bool A_row_major = (orderA == ViennaCLRowMajor);
+    bool B_row_major = (orderB == ViennaCLRowMajor);
+    bool C_row_major = (orderC == ViennaCLRowMajor);
+
+    viennacl::matrix_base<NumericT> matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id),
+                                         A_size1, size_type(offA_row), difference_type(incA_row), size_type(A_row_major ? m : lda),
+                                         A_size2, size_type(offA_col), difference_type(incA_col), size_type(A_row_major ? lda : k), A_row_major);
+
+    viennacl::matrix_base<NumericT> matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id),
+                                         B_size1, size_type(offB_row), difference_type(incB_row), size_type(B_row_major ? k : ldb),
+                                         B_size2, size_type(offB_col), difference_type(incB_col), size_type(B_row_major ? ldb : n), B_row_major);
+
+    viennacl::matrix_base<NumericT> matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id),
+                                         size_type(m), size_type(offC_row), difference_type(incC_row), size_type(C_row_major ? m : ldc),
+                                         size_type(n), size_type(offC_col), difference_type(incC_col), size_type(C_row_major ? ldc : n), C_row_major);
+
+    detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC);
+
+    return ViennaCLSuccess;
+  }
+
+}
+
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemm(ViennaCLBackend backend,
+                                                              ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                                              ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                                              ViennaCLOrder orderC,
+                                                              ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                                              float alpha,
+                                                              cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                              cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                                              float beta,
+                                                              cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+{
+  return detail::ViennaCLOpenCLgemm_impl<float>(backend,
+                                                orderA, transA,
+                                                orderB, transB,
+                                                orderC,
+                                                m, n, k,
+                                                alpha,
+                                                A, offA_row, offA_col, incA_row, incA_col, lda,
+                                                B, offB_row, offB_col, incB_row, incB_col, ldb,
+                                                beta,
+                                                C, offC_row, offC_col, incC_row, incC_col, ldc);
+}
+
+VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemm(ViennaCLBackend backend,
+                                                              ViennaCLOrder orderA, ViennaCLTranspose transA,
+                                                              ViennaCLOrder orderB, ViennaCLTranspose transB,
+                                                              ViennaCLOrder orderC,
+                                                              ViennaCLInt m, ViennaCLInt n, ViennaCLInt k,
+                                                              double alpha,
+                                                              cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
+                                                              cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb,
+                                                              double beta,
+                                                              cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc)
+{
+  return detail::ViennaCLOpenCLgemm_impl<double>(backend,
+                                                 orderA, transA,
+                                                 orderB, transB,
+                                                 orderC,
+                                                 m, n, k,
+                                                 alpha,
+                                                 A, offA_row, offA_col, incA_row, incA_col, lda,
+                                                 B, offB_row, offB_col, incB_row, incB_col, ldb,
+                                                 beta,
+                                                 C, offC_row, offC_col, incC_row, incC_col, ldc);
+}
+
+
+#endif