You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ap...@apache.org on 2016/06/10 16:52:20 UTC

[15/51] [partial] mahout git commit: Revert "(nojira) add native-viennaCL module to codebase. closes apache/mahout#241"

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/matrix_operations.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/matrix_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/matrix_operations.hpp
deleted file mode 100644
index c9dec88..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/matrix_operations.hpp
+++ /dev/null
@@ -1,1303 +0,0 @@
-#ifndef VIENNACL_LINALG_MATRIX_OPERATIONS_HPP_
-#define VIENNACL_LINALG_MATRIX_OPERATIONS_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/linalg/matrix_operations.hpp
-    @brief Implementations of dense matrix related operations including matrix-vector products.
-*/
-
-#include "viennacl/forwards.h"
-#include "viennacl/scalar.hpp"
-#include "viennacl/vector.hpp"
-#include "viennacl/vector_proxy.hpp"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/meta/enable_if.hpp"
-#include "viennacl/meta/predicate.hpp"
-#include "viennacl/meta/result_of.hpp"
-#include "viennacl/traits/size.hpp"
-#include "viennacl/traits/start.hpp"
-#include "viennacl/traits/handle.hpp"
-#include "viennacl/traits/stride.hpp"
-#include "viennacl/vector.hpp"
-#include "viennacl/linalg/host_based/matrix_operations.hpp"
-
-#ifdef VIENNACL_WITH_OPENCL
-  #include "viennacl/linalg/opencl/matrix_operations.hpp"
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-  #include "viennacl/linalg/cuda/matrix_operations.hpp"
-#endif
-
-namespace viennacl
-{
-  namespace linalg
-  {
-
-    template<typename DestNumericT, typename SrcNumericT>
-    void convert(matrix_base<DestNumericT> & dest, matrix_base<SrcNumericT> const & src)
-    {
-      assert(viennacl::traits::size1(dest) == viennacl::traits::size1(src) && bool("Incompatible matrix sizes in m1 = m2 (convert): size1(m1) != size1(m2)"));
-      assert(viennacl::traits::size2(dest) == viennacl::traits::size2(src) && bool("Incompatible matrix sizes in m1 = m2 (convert): size2(m1) != size2(m2)"));
-
-      switch (viennacl::traits::handle(dest).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::convert(dest, src);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::convert(dest, src);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::convert(dest, src);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-    template<typename NumericT,
-              typename SizeT, typename DistanceT>
-    void trans(const matrix_expression<const matrix_base<NumericT, SizeT, DistanceT>,const matrix_base<NumericT, SizeT, DistanceT>, op_trans> & proxy,
-              matrix_base<NumericT> & temp_trans)
-    {
-      switch (viennacl::traits::handle(proxy).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::trans(proxy, temp_trans);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::trans(proxy,temp_trans);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::trans(proxy,temp_trans);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-    template<typename NumericT,
-              typename ScalarType1>
-    void am(matrix_base<NumericT> & mat1,
-            matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
-    {
-      switch (viennacl::traits::handle(mat1).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-    template<typename NumericT,
-              typename ScalarType1, typename ScalarType2>
-    void ambm(matrix_base<NumericT> & mat1,
-              matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
-              matrix_base<NumericT> const & mat3, ScalarType2 const & beta,  vcl_size_t len_beta,  bool reciprocal_beta,  bool flip_sign_beta)
-    {
-      switch (viennacl::traits::handle(mat1).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::ambm(mat1,
-                                             mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
-                                             mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::ambm(mat1,
-                                         mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
-                                         mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::ambm(mat1,
-                                       mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
-                                       mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-    template<typename NumericT,
-              typename ScalarType1, typename ScalarType2>
-    void ambm_m(matrix_base<NumericT> & mat1,
-                matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
-                matrix_base<NumericT> const & mat3, ScalarType2 const & beta,  vcl_size_t len_beta,  bool reciprocal_beta,  bool flip_sign_beta)
-    {
-      switch (viennacl::traits::handle(mat1).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::ambm_m(mat1,
-                                               mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
-                                               mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::ambm_m(mat1,
-                                           mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
-                                           mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::ambm_m(mat1,
-                                         mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
-                                         mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-    template<typename NumericT>
-    void matrix_assign(matrix_base<NumericT> & mat, NumericT s, bool clear = false)
-    {
-      switch (viennacl::traits::handle(mat).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::matrix_assign(mat, s, clear);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::matrix_assign(mat, s, clear);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::matrix_assign(mat, s, clear);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-    template<typename NumericT>
-    void matrix_diagonal_assign(matrix_base<NumericT> & mat, NumericT s)
-    {
-      switch (viennacl::traits::handle(mat).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::matrix_diagonal_assign(mat, s);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::matrix_diagonal_assign(mat, s);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::matrix_diagonal_assign(mat, s);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-    /** @brief Dispatcher interface for A = diag(v, k) */
-    template<typename NumericT>
-    void matrix_diag_from_vector(const vector_base<NumericT> & v, int k, matrix_base<NumericT> & A)
-    {
-      switch (viennacl::traits::handle(v).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::matrix_diag_from_vector(v, k, A);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::matrix_diag_from_vector(v, k, A);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::matrix_diag_from_vector(v, k, A);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-    /** @brief Dispatcher interface for v = diag(A, k) */
-    template<typename NumericT>
-    void matrix_diag_to_vector(const matrix_base<NumericT> & A, int k, vector_base<NumericT> & v)
-    {
-      switch (viennacl::traits::handle(A).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::matrix_diag_to_vector(A, k, v);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::matrix_diag_to_vector(A, k, v);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::matrix_diag_to_vector(A, k, v);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-    template<typename NumericT>
-    void matrix_row(const matrix_base<NumericT> & A, unsigned int i, vector_base<NumericT> & v)
-    {
-      switch (viennacl::traits::handle(A).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::matrix_row(A, i, v);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::matrix_row(A, i, v);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::matrix_row(A, i, v);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-    template<typename NumericT>
-    void matrix_column(const matrix_base<NumericT> & A, unsigned int j, vector_base<NumericT> & v)
-    {
-      switch (viennacl::traits::handle(A).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::matrix_column(A, j, v);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::matrix_column(A, j, v);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::matrix_column(A, j, v);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-    /** @brief Computes the Frobenius norm of a matrix - dispatcher interface
-    *
-    * @param A      The matrix
-    * @param result The result scalar
-    *
-    * Note that if A is strided or off-set, then a copy will be created.
-    */
-    template<typename T>
-    void norm_frobenius_impl(matrix_base<T> const & A,
-                             scalar<T> & result)
-    {
-      typedef typename matrix_base<T>::handle_type  HandleType;
-
-      if ((A.start1() > 0) || (A.start2() > 0) || (A.stride1() > 1) || (A.stride2() > 1)) {
-        if (A.row_major()) {
-          viennacl::matrix<T, viennacl::row_major> temp_A(A);
-          viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1);
-          norm_2_impl(temp, result);
-        } else {
-          viennacl::matrix<T, viennacl::column_major> temp_A(A);
-          viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1);
-          norm_2_impl(temp, result);
-        }
-      } else {
-        viennacl::vector_base<T> temp(const_cast<HandleType &>(A.handle()), A.internal_size(), 0, 1);
-        norm_2_impl(temp, result);
-      }
-
-    }
-
-    /** @brief Computes the Frobenius norm of a vector with final reduction on the CPU
-    *
-    * @param A      The matrix
-    * @param result The result scalar
-    *
-    * Note that if A is strided or off-set, then a copy will be created.
-    */
-    template<typename T>
-    void norm_frobenius_cpu(matrix_base<T> const & A,
-                            T & result)
-    {
-      typedef typename matrix_base<T>::handle_type  HandleType;
-
-      if ((A.start1() > 0) || (A.start2() > 0) || (A.stride1() > 1) || (A.stride2() > 1)) {
-        if (A.row_major()) {
-          viennacl::matrix<T, viennacl::row_major> temp_A(A);
-          viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1);
-          norm_2_cpu(temp, result);
-        } else {
-          viennacl::matrix<T, viennacl::column_major> temp_A(A);
-          viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1);
-          norm_2_cpu(temp, result);
-        }
-      } else {
-        viennacl::vector_base<T> temp(const_cast<HandleType &>(A.handle()), A.internal_size(), 0, 1);
-        norm_2_cpu(temp, result);
-      }
-
-    }
-
-    //
-    /////////////////////////   matrix-vector products /////////////////////////////////
-    //
-
-
-
-    // A * x
-
-    /** @brief Carries out matrix-vector multiplication
-    *
-    * Implementation of the convenience expression result = prod(mat, vec);
-    *
-    * @param mat    The matrix
-    * @param vec    The vector
-    * @param result The result vector
-    */
-    template<typename NumericT>
-    void prod_impl(const matrix_base<NumericT> & mat,
-                   const vector_base<NumericT> & vec,
-                         vector_base<NumericT> & result)
-    {
-      assert( (viennacl::traits::size1(mat) == viennacl::traits::size(result)) && bool("Size check failed at v1 = prod(A, v2): size1(A) != size(v1)"));
-      assert( (viennacl::traits::size2(mat) == viennacl::traits::size(vec))    && bool("Size check failed at v1 = prod(A, v2): size2(A) != size(v2)"));
-
-      switch (viennacl::traits::handle(mat).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::prod_impl(mat, false, vec, result);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::prod_impl(mat, false, vec, result);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::prod_impl(mat, false, vec, result);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-    // trans(A) * x
-
-    /** @brief Carries out matrix-vector multiplication with a transposed matrix
-    *
-    * Implementation of the convenience expression result = trans(mat) * vec;
-    *
-    * @param mat_trans  The transposed matrix proxy
-    * @param vec        The vector
-    * @param result     The result vector
-    */
-    template<typename NumericT>
-    void prod_impl(const matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & mat_trans,
-                   const vector_base<NumericT> & vec,
-                         vector_base<NumericT> & result)
-    {
-      assert( (viennacl::traits::size1(mat_trans.lhs()) == viennacl::traits::size(vec))    && bool("Size check failed at v1 = trans(A) * v2: size1(A) != size(v2)"));
-      assert( (viennacl::traits::size2(mat_trans.lhs()) == viennacl::traits::size(result)) && bool("Size check failed at v1 = trans(A) * v2: size2(A) != size(v1)"));
-
-      switch (viennacl::traits::handle(mat_trans.lhs()).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::prod_impl(mat_trans.lhs(), true, vec, result);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::prod_impl(mat_trans.lhs(), true, vec, result);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::prod_impl(mat_trans.lhs(), true, vec, result);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-    //
-    /////////////////////////   matrix-matrix products /////////////////////////////////
-    //
-
-    /** @brief Carries out matrix-matrix multiplication
-    *
-    * Implementation of C = prod(A, B);
-    *
-    */
-    template<typename NumericT, typename ScalarType >
-    void prod_impl(const matrix_base<NumericT> & A,
-                   const matrix_base<NumericT> & B,
-                         matrix_base<NumericT> & C,
-                   ScalarType alpha,
-                   ScalarType beta)
-    {
-      assert( (viennacl::traits::size1(A) == viennacl::traits::size1(C)) && bool("Size check failed at C = prod(A, B): size1(A) != size1(C)"));
-      assert( (viennacl::traits::size2(A) == viennacl::traits::size1(B)) && bool("Size check failed at C = prod(A, B): size2(A) != size1(B)"));
-      assert( (viennacl::traits::size2(B) == viennacl::traits::size2(C)) && bool("Size check failed at C = prod(A, B): size2(B) != size2(C)"));
-
-
-      switch (viennacl::traits::handle(A).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::prod_impl(A, false, B, false, C, alpha, beta);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::prod_impl(A, false, B, false, C, alpha, beta);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::prod_impl(A, false, B, false, C, alpha, beta);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-
-    /** @brief Carries out matrix-matrix multiplication
-    *
-    * Implementation of C = prod(trans(A), B);
-    *
-    */
-    template<typename NumericT, typename ScalarType >
-    void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT>,
-                                                      const matrix_base<NumericT>,
-                                                      op_trans> & A,
-                   const matrix_base<NumericT> & B,
-                         matrix_base<NumericT> & C,
-                   ScalarType alpha,
-                   ScalarType beta)
-    {
-      assert(viennacl::traits::size2(A.lhs()) == viennacl::traits::size1(C) && bool("Size check failed at C = prod(trans(A), B): size2(A) != size1(C)"));
-      assert(viennacl::traits::size1(A.lhs()) == viennacl::traits::size1(B) && bool("Size check failed at C = prod(trans(A), B): size1(A) != size1(B)"));
-      assert(viennacl::traits::size2(B)       == viennacl::traits::size2(C) && bool("Size check failed at C = prod(trans(A), B): size2(B) != size2(C)"));
-
-      switch (viennacl::traits::handle(A.lhs()).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::prod_impl(A.lhs(), true, B, false, C, alpha, beta);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::prod_impl(A.lhs(), true, B, false, C, alpha, beta);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::prod_impl(A.lhs(), true, B, false, C, alpha, beta);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-
-
-    /** @brief Carries out matrix-matrix multiplication
-    *
-    * Implementation of C = prod(A, trans(B));
-    *
-    */
-    template<typename NumericT, typename ScalarType >
-    void prod_impl(const matrix_base<NumericT> & A,
-                   const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & B,
-                         matrix_base<NumericT> & C,
-                   ScalarType alpha,
-                   ScalarType beta)
-    {
-      assert(viennacl::traits::size1(A)       == viennacl::traits::size1(C)       && bool("Size check failed at C = prod(A, trans(B)): size1(A) != size1(C)"));
-      assert(viennacl::traits::size2(A)       == viennacl::traits::size2(B.lhs()) && bool("Size check failed at C = prod(A, trans(B)): size2(A) != size2(B)"));
-      assert(viennacl::traits::size1(B.lhs()) == viennacl::traits::size2(C)       && bool("Size check failed at C = prod(A, trans(B)): size1(B) != size2(C)"));
-
-      switch (viennacl::traits::handle(A).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::prod_impl(A, false, B.lhs(), true, C, alpha, beta);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::prod_impl(A, false, B.lhs(), true, C, alpha, beta);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::prod_impl(A, false, B.lhs(), true, C, alpha, beta);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-
-    /** @brief Carries out matrix-matrix multiplication
-    *
-    * Implementation of C = prod(trans(A), trans(B));
-    *
-    */
-    template<typename NumericT, typename ScalarType >
-    void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & A,
-                   const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & B,
-                   matrix_base<NumericT> & C,
-                   ScalarType alpha,
-                   ScalarType beta)
-    {
-      assert(viennacl::traits::size2(A.lhs()) == viennacl::traits::size1(C)       && bool("Size check failed at C = prod(trans(A), trans(B)): size2(A) != size1(C)"));
-      assert(viennacl::traits::size1(A.lhs()) == viennacl::traits::size2(B.lhs()) && bool("Size check failed at C = prod(trans(A), trans(B)): size1(A) != size2(B)"));
-      assert(viennacl::traits::size1(B.lhs()) == viennacl::traits::size2(C)       && bool("Size check failed at C = prod(trans(A), trans(B)): size1(B) != size2(C)"));
-
-      switch (viennacl::traits::handle(A.lhs()).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-    ///////////////////////// summation operations /////////////
-
-    template<typename NumericT>
-    void row_sum_impl(matrix_base<NumericT> const & A, vector_base<NumericT> & result)
-    {
-      viennacl::vector<NumericT> all_ones = viennacl::scalar_vector<NumericT>(A.size2(), NumericT(1), viennacl::traits::context(A));
-      viennacl::linalg::prod_impl(A, all_ones, result);
-    }
-
-    template<typename NumericT>
-    void column_sum_impl(matrix_base<NumericT> const & A, vector_base<NumericT> & result)
-    {
-      viennacl::vector<NumericT> all_ones = viennacl::scalar_vector<NumericT>(A.size1(), NumericT(1), viennacl::traits::context(A));
-      viennacl::linalg::prod_impl(matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>(A, A), all_ones, result);
-    }
-
-    ///////////////////////// Elementwise operations /////////////
-
-
-
-    /** @brief Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syntax). Don't use this function directly, use element_prod() and element_div().
-    *
-    * @param A      The result matrix (or -range, or -slice)
-    * @param proxy  The proxy object holding B, C, and the operation
-    */
-    template<typename T, typename OP>
-    void element_op(matrix_base<T> & A,
-                    matrix_expression<const matrix_base<T>, const matrix_base<T>, OP> const & proxy)
-    {
-      assert( (viennacl::traits::size1(A) == viennacl::traits::size1(proxy)) && bool("Size check failed at A = element_op(B): size1(A) != size1(B)"));
-      assert( (viennacl::traits::size2(A) == viennacl::traits::size2(proxy)) && bool("Size check failed at A = element_op(B): size2(A) != size2(B)"));
-
-      switch (viennacl::traits::handle(A).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::element_op(A, proxy);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::element_op(A, proxy);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::element_op(A, proxy);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-
-#define VIENNACL_MAKE_BINARY_OP(OPNAME)\
-    template<typename T>\
-    viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<op_##OPNAME> >\
-    element_##OPNAME(matrix_base<T> const & A, matrix_base<T> const & B)\
-    {\
-      return viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<op_##OPNAME> >(A, B);\
-    }\
-\
-    template<typename M1, typename M2, typename OP, typename T>\
-    viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP>,\
-                                const matrix_base<T>,\
-                                op_element_binary<op_##OPNAME> >\
-    element_##OPNAME(matrix_expression<const M1, const M2, OP> const & proxy, matrix_base<T> const & B)\
-    {\
-      return viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP>,\
-                                         const matrix_base<T>,\
-                                         op_element_binary<op_##OPNAME> >(proxy, B);\
-    }\
-\
-    template<typename T, typename M2, typename M3, typename OP>\
-    viennacl::matrix_expression<const matrix_base<T>,\
-                                const matrix_expression<const M2, const M3, OP>,\
-                                op_element_binary<op_##OPNAME> >\
-    element_##OPNAME(matrix_base<T> const & A, matrix_expression<const M2, const M3, OP> const & proxy)\
-    {\
-      return viennacl::matrix_expression<const matrix_base<T>,\
-                                         const matrix_expression<const M2, const M3, OP>,\
-                                         op_element_binary<op_##OPNAME> >(A, proxy);\
-    }\
-\
-    template<typename M1, typename M2, typename OP1,\
-              typename M3, typename M4, typename OP2>\
-    viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP1>,\
-                                const matrix_expression<const M3, const M4, OP2>,\
-                                op_element_binary<op_##OPNAME> >\
-    element_##OPNAME(matrix_expression<const M1, const M2, OP1> const & proxy1,\
-                 matrix_expression<const M3, const M4, OP2> const & proxy2)\
-    {\
-      return viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP1>,\
-                                         const matrix_expression<const M3, const M4, OP2>,\
-                                         op_element_binary<op_##OPNAME> >(proxy1, proxy2);\
-    }
-
-    VIENNACL_MAKE_BINARY_OP(prod)
-    VIENNACL_MAKE_BINARY_OP(div)
-    VIENNACL_MAKE_BINARY_OP(pow)
-
-    VIENNACL_MAKE_BINARY_OP(eq)
-    VIENNACL_MAKE_BINARY_OP(neq)
-    VIENNACL_MAKE_BINARY_OP(greater)
-    VIENNACL_MAKE_BINARY_OP(less)
-    VIENNACL_MAKE_BINARY_OP(geq)
-    VIENNACL_MAKE_BINARY_OP(leq)
-
-#undef VIENNACL_GENERATE_BINARY_OP_OVERLOADS
-
-
-
-#define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname) \
-    template<typename T> \
-    viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<op_##funcname> > \
-    element_##funcname(matrix_base<T> const & A) \
-    { \
-      return viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<op_##funcname> >(A, A); \
-    } \
-    template<typename LHS, typename RHS, typename OP> \
-    viennacl::matrix_expression<const matrix_expression<const LHS, const RHS, OP>, \
-                                const matrix_expression<const LHS, const RHS, OP>, \
-                                op_element_unary<op_##funcname> > \
-    element_##funcname(matrix_expression<const LHS, const RHS, OP> const & proxy) \
-    { \
-      return viennacl::matrix_expression<const matrix_expression<const LHS, const RHS, OP>, \
-                                         const matrix_expression<const LHS, const RHS, OP>, \
-                                         op_element_unary<op_##funcname> >(proxy, proxy); \
-    } \
-
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(abs)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(acos)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(asin)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(atan)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(ceil)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(cos)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(cosh)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(exp)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(fabs)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(floor)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(log)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(log10)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(sin)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(sinh)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(sqrt)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(tan)
-    VIENNACL_MAKE_UNARY_ELEMENT_OP(tanh)
-
-#undef VIENNACL_MAKE_UNARY_ELEMENT_OP
-
-
-    //
-    /////////////////////////   miscellaneous operations /////////////////////////////////
-    //
-
-
-    /** @brief Returns a proxy class for the operation mat += vec1 * vec2^T, i.e. a rank 1 update
-    *
-    * @param vec1    The first vector
-    * @param vec2    The second vector
-    */
-    template<typename NumericT>
-    viennacl::matrix_expression<const vector_base<NumericT>, const vector_base<NumericT>, op_prod>
-    outer_prod(const vector_base<NumericT> & vec1, const vector_base<NumericT> & vec2)
-    {
-      return viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>(vec1, vec2);
-    }
-
-
-    /** @brief The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update
-    *
-    * Implementation of the convenience expression result += alpha * outer_prod(vec1, vec2);
-    *
-    * @param mat1             The matrix to be updated
-    * @param alpha            The scaling factor (either a viennacl::scalar<>, float, or double)
-    * @param len_alpha        Length of the buffer for an eventual final reduction step (currently always '1')
-    * @param reciprocal_alpha Use 1/alpha instead of alpha
-    * @param flip_sign_alpha  Use -alpha instead of alpha
-    * @param vec1             The first vector
-    * @param vec2             The second vector
-    */
-    template<typename NumericT, typename S1>
-    void scaled_rank_1_update(matrix_base<NumericT> & mat1,
-                              S1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
-                              const vector_base<NumericT> & vec1,
-                              const vector_base<NumericT> & vec2)
-    {
-      switch (viennacl::traits::handle(mat1).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::scaled_rank_1_update(mat1,
-                                                             alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
-                                                             vec1, vec2);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::scaled_rank_1_update(mat1,
-                                                         alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
-                                                         vec1, vec2);
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::scaled_rank_1_update(mat1,
-                                                       alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
-                                                       vec1, vec2);
-          break;
-#endif
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-    }
-
-    /** @brief This function stores the diagonal and the superdiagonal of a matrix in two vectors.
-    *
-    *
-    * @param A     The matrix from which the vectors will be extracted of.
-    * @param dh    The vector in which the diagonal of the matrix will be stored in.
-    * @param sh    The vector in which the superdiagonal of the matrix will be stored in.
-    */
-
-    template <typename NumericT, typename VectorType>
-    void bidiag_pack(matrix_base<NumericT> & A,
-                     VectorType & dh,
-                     VectorType & sh
-                    )
-    {
-      switch (viennacl::traits::handle(A).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::bidiag_pack(A, dh, sh);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::bidiag_pack(A, dh, sh);
-          break;
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::bidiag_pack(A, dh, sh);
-          break;
-#endif
-
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-
-
-    }
-    /** @brief This function copies a row or a column from a matrix to a vector.
-    *
-    *
-    * @param A          The matrix where to copy from.
-    * @param V          The vector to fill with data.
-    * @param row_start  The number of the first row to copy.
-    * @param col_start  The number of the first column to copy.
-    * @param copy_col   Set to TRUE to copy a column, FALSE to copy a row.
-    */
-
-    template <typename SCALARTYPE>
-    void copy_vec(matrix_base<SCALARTYPE>& A,
-                  vector_base<SCALARTYPE>& V,
-                  vcl_size_t row_start,
-                  vcl_size_t col_start,
-                  bool copy_col
-    )
-    {
-      switch (viennacl::traits::handle(A).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::copy_vec(A, V, row_start, col_start, copy_col);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::copy_vec(A, V, row_start, col_start, copy_col);
-          break;
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-        case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::copy_vec(A, V, row_start, col_start, copy_col);
-          break;
-#endif
-
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-      }
-
-    }
-
-    /** @brief This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P
-    *
-    * @param A       The matrix to be updated.
-    * @param D       The normalized householder vector.
-    * @param start   The repetition counter.
-    */
-  template <typename NumericT>
-  void house_update_A_left(matrix_base<NumericT> & A,
-                           vector_base<NumericT>    & D,
-                           vcl_size_t start)
-  {
-    switch (viennacl::traits::handle(A).get_active_handle_id())
-    {
-      case viennacl::MAIN_MEMORY:
-        viennacl::linalg::host_based::house_update_A_left(A, D, start);
-        break;
-#ifdef VIENNACL_WITH_OPENCL
-      case viennacl::OPENCL_MEMORY:
-        viennacl::linalg::opencl::house_update_A_left(A, D, start);
-        break;
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-      case viennacl::CUDA_MEMORY:
-        viennacl::linalg::cuda::house_update_A_left(A, D, start);
-        break;
-#endif
-
-      case viennacl::MEMORY_NOT_INITIALIZED:
-        throw memory_exception("not initialised!");
-      default:
-        throw memory_exception("not implemented");
-    }
-  }
-
-
-  /** @brief This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P
-  *
-  *
-  * @param A        The matrix to be updated.
-  * @param D        The normalized householder vector.
-  */
-
-  template <typename NumericT>
-  void house_update_A_right(matrix_base<NumericT>& A,
-                            vector_base<NumericT>   & D)
-  {
-    switch (viennacl::traits::handle(A).get_active_handle_id())
-    {
-      case viennacl::MAIN_MEMORY:
-        viennacl::linalg::host_based::house_update_A_right(A, D);
-        break;
-#ifdef VIENNACL_WITH_OPENCL
-      case viennacl::OPENCL_MEMORY:
-        viennacl::linalg::opencl::house_update_A_right(A, D);
-        break;
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-      case viennacl::CUDA_MEMORY:
-        viennacl::linalg::cuda::house_update_A_right(A, D);
-        break;
-#endif
-
-      case viennacl::MEMORY_NOT_INITIALIZED:
-        throw memory_exception("not initialised!");
-      default:
-        throw memory_exception("not implemented");
-    }
-  }
-
-  /** @brief This function updates the matrix Q, which is needed for the computation of the eigenvectors.
-  *
-  * @param Q        The matrix to be updated.
-  * @param D        The householder vector.
-  * @param A_size1  size1 of matrix A
-  */
-
-  template <typename NumericT>
-  void house_update_QL(matrix_base<NumericT> & Q,
-                       vector_base<NumericT>    & D,
-                       vcl_size_t A_size1)
-  {
-    switch (viennacl::traits::handle(Q).get_active_handle_id())
-    {
-      case viennacl::MAIN_MEMORY:
-        viennacl::linalg::host_based::house_update_QL(Q, D, A_size1);
-        break;
-#ifdef VIENNACL_WITH_OPENCL
-      case viennacl::OPENCL_MEMORY:
-        viennacl::linalg::opencl::house_update_QL(Q, D, A_size1);
-        break;
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-      case viennacl::CUDA_MEMORY:
-        viennacl::linalg::cuda::house_update_QL(Q, D, A_size1);
-        break;
-#endif
-
-      case viennacl::MEMORY_NOT_INITIALIZED:
-        throw memory_exception("not initialised!");
-      default:
-        throw memory_exception("not implemented");
-    }
-  }
-
-
-  /** @brief This function updates the matrix Q. It is part of the tql2 algorithm.
-  *
-  *
-  * @param Q       The matrix to be updated.
-  * @param tmp1    Vector with data from the tql2 algorithm.
-  * @param tmp2    Vector with data from the tql2 algorithm.
-  * @param l       Data from the tql2 algorithm.
-  * @param m       Data from the tql2 algorithm.
-  */
-  template<typename NumericT>
-  void givens_next(matrix_base<NumericT> & Q,
-                   vector_base<NumericT> & tmp1,
-                   vector_base<NumericT> & tmp2,
-                   int l,
-                   int m
-                )
-  {
-    switch (viennacl::traits::handle(Q).get_active_handle_id())
-    {
-      case viennacl::MAIN_MEMORY:
-        viennacl::linalg::host_based::givens_next(Q, tmp1, tmp2, l, m);
-        break;
-#ifdef VIENNACL_WITH_OPENCL
-      case viennacl::OPENCL_MEMORY:
-        viennacl::linalg::opencl::givens_next(Q, tmp1, tmp2, l, m);
-        break;
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-      case viennacl::CUDA_MEMORY:
-        viennacl::linalg::cuda::givens_next(Q, tmp1, tmp2, l, m);
-        break;
-#endif
-
-      case viennacl::MEMORY_NOT_INITIALIZED:
-        throw memory_exception("not initialised!");
-      default:
-        throw memory_exception("not implemented");
-    }
-  }
-
-  } //namespace linalg
-
-
-
-
-  //
-  /////////////////////////  Operator overloads /////////////////////////////////
-  //
-
-
-  //v += A * x
-  /** @brief Implementation of the operation v1 += A * v2, where A is a matrix
-  *
-  * @param v1     The result vector v1 where A * v2 is added to
-  * @param proxy  An expression template proxy class.
-  */
-  template<typename NumericT>
-  vector<NumericT>
-  operator+=(vector_base<NumericT> & v1,
-             const viennacl::vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, viennacl::op_prod> & proxy)
-  {
-    assert(viennacl::traits::size1(proxy.lhs()) == v1.size() && bool("Size check failed for v1 += A * v2: size1(A) != size(v1)"));
-
-    vector<NumericT> result(viennacl::traits::size1(proxy.lhs()));
-    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
-    v1 += result;
-    return v1;
-  }
-
-  /** @brief Implementation of the operation v1 -= A * v2, where A is a matrix
-  *
-  * @param v1     The result vector v1 where A * v2 is subtracted from
-  * @param proxy  An expression template proxy class.
-  */
-  template<typename NumericT>
-  vector<NumericT>
-  operator-=(vector_base<NumericT> & v1,
-             const viennacl::vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, viennacl::op_prod> & proxy)
-  {
-    assert(viennacl::traits::size1(proxy.lhs()) == v1.size() && bool("Size check failed for v1 -= A * v2: size1(A) != size(v1)"));
-
-    vector<NumericT> result(viennacl::traits::size1(proxy.lhs()));
-    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
-    v1 -= result;
-    return v1;
-  }
-
-
-
-
-
-  //free functions:
-  /** @brief Implementation of the operation 'result = v1 + A * v2', where A is a matrix
-  *
-  * @param v1     The addend vector.
-  * @param proxy  An expression template proxy class.
-  */
-  template<typename NumericT>
-  viennacl::vector<NumericT>
-  operator+(const vector_base<NumericT> & v1,
-            const vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
-  {
-    assert(viennacl::traits::size1(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed for v1 + A * v2: size1(A) != size(v1)"));
-
-    vector<NumericT> result(viennacl::traits::size(v1));
-    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
-    result += v1;
-    return result;
-  }
-
-  /** @brief Implementation of the operation 'result = v1 - A * v2', where A is a matrix
-  *
-  * @param v1     The addend vector.
-  * @param proxy  An expression template proxy class.
-  */
-  template<typename NumericT>
-  viennacl::vector<NumericT>
-  operator-(const vector_base<NumericT> & v1,
-            const vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
-  {
-    assert(viennacl::traits::size1(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed for v1 - A * v2: size1(A) != size(v1)"));
-
-    vector<NumericT> result(viennacl::traits::size(v1));
-    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
-    result = v1 - result;
-    return result;
-  }
-
-
-  ////////// transposed_matrix_proxy
-
-
-  //v += A^T * x
-  /** @brief Implementation of the operation v1 += A * v2, where A is a matrix
-  *
-  * @param v1     The addend vector where the result is written to.
-  * @param proxy  An expression template proxy class.
-  */
-  template<typename NumericT>
-  vector<NumericT>
-  operator+=(vector_base<NumericT> & v1,
-             const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>,
-                                                              const vector_base<NumericT>,
-                                                              op_prod> & proxy)
-  {
-    assert(viennacl::traits::size2(proxy.lhs()) == v1.size() && bool("Size check failed in v1 += trans(A) * v2: size2(A) != size(v1)"));
-
-    vector<NumericT> result(viennacl::traits::size2(proxy.lhs()));
-    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
-    v1 += result;
-    return v1;
-  }
-
-  //v -= A^T * x
-  /** @brief Implementation of the operation v1 -= A * v2, where A is a matrix
-  *
-  * @param v1     The addend vector where the result is written to.
-  * @param proxy  An expression template proxy class.
-  */
-  template<typename NumericT>
-  vector<NumericT>
-  operator-=(vector_base<NumericT> & v1,
-             const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>,
-                                                              const vector_base<NumericT>,
-                                                              op_prod> & proxy)
-  {
-    assert(viennacl::traits::size2(proxy.lhs()) == v1.size() && bool("Size check failed in v1 += trans(A) * v2: size2(A) != size(v1)"));
-
-    vector<NumericT> result(viennacl::traits::size2(proxy.lhs()));
-    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
-    v1 -= result;
-    return v1;
-  }
-
-
-  //free functions:
-  /** @brief Implementation of the operation 'result = v1 + A * v2', where A is a matrix
-  *
-  * @param v1     The addend vector.
-  * @param proxy  An expression template proxy class.
-  */
-  template<typename NumericT>
-  vector<NumericT>
-  operator+(const vector_base<NumericT> & v1,
-            const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>,
-                                     const vector_base<NumericT>,
-                                     op_prod> & proxy)
-  {
-    assert(viennacl::traits::size2(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed in v1 + trans(A) * v2: size2(A) != size(v1)"));
-
-    vector<NumericT> result(viennacl::traits::size(v1));
-    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
-    result += v1;
-    return result;
-  }
-
-  /** @brief Implementation of the operation 'result = v1 - A * v2', where A is a matrix
-  *
-  * @param v1     The addend vector.
-  * @param proxy  An expression template proxy class.
-  */
-  template<typename NumericT>
-  vector<NumericT>
-  operator-(const vector_base<NumericT> & v1,
-            const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>,
-                                     const vector_base<NumericT>,
-                                     op_prod> & proxy)
-  {
-    assert(viennacl::traits::size2(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed in v1 - trans(A) * v2: size2(A) != size(v1)"));
-
-    vector<NumericT> result(viennacl::traits::size(v1));
-    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
-    result = v1 - result;
-    return result;
-  }
-
-
-} //namespace viennacl
-
-
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/maxmin.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/maxmin.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/maxmin.hpp
deleted file mode 100644
index 9269598..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/maxmin.hpp
+++ /dev/null
@@ -1,152 +0,0 @@
-#ifndef VIENNACL_LINALG_MAXMIN_HPP_
-#define VIENNACL_LINALG_MAXMIN_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file norm_inf.hpp
-    @brief Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations.
-*/
-
-#include <cmath>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/meta/enable_if.hpp"
-#include "viennacl/meta/tag_of.hpp"
-#include "viennacl/meta/result_of.hpp"
-
-namespace viennacl
-{
-  //
-  // generic norm_inf function
-  //   uses tag dispatch to identify which algorithm
-  //   should be called
-  //
-  namespace linalg
-  {
-
-
-    // ----------------------------------------------------
-    // STL
-    //
-    template< typename NumericT >
-    NumericT max(std::vector<NumericT> const & v1)
-    {
-      //std::cout << "stl .. " << std::endl;
-      NumericT result = v1[0];
-      for (vcl_size_t i=1; i<v1.size(); ++i)
-      {
-        if (v1[i] > result)
-          result = v1[i];
-      }
-
-      return result;
-    }
-
-    // ----------------------------------------------------
-    // VIENNACL
-    //
-    template< typename ScalarType>
-    viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
-                                 const viennacl::vector_base<ScalarType>,
-                                 viennacl::op_max >
-    max(viennacl::vector_base<ScalarType> const & v1)
-    {
-       //std::cout << "viennacl .. " << std::endl;
-      return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
-                                          const viennacl::vector_base<ScalarType>,
-                                          viennacl::op_max >(v1, v1);
-    }
-
-    // with vector expression:
-    template<typename LHS, typename RHS, typename OP>
-    viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                viennacl::op_max>
-    max(viennacl::vector_expression<const LHS, const RHS, OP> const & vector)
-    {
-      return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                          const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                          viennacl::op_max >(vector, vector);
-    }
-
-    // ----------------------------------------------------
-    // STL
-    //
-    template< typename NumericT >
-    NumericT min(std::vector<NumericT> const & v1)
-    {
-      //std::cout << "stl .. " << std::endl;
-      NumericT result = v1[0];
-      for (vcl_size_t i=1; i<v1.size(); ++i)
-      {
-        if (v1[i] < result)
-          result = v1[i];
-      }
-
-      return result;
-    }
-
-    // ----------------------------------------------------
-    // VIENNACL
-    //
-    template< typename ScalarType>
-    viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
-                                 const viennacl::vector_base<ScalarType>,
-                                 viennacl::op_min >
-    min(viennacl::vector_base<ScalarType> const & v1)
-    {
-       //std::cout << "viennacl .. " << std::endl;
-      return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
-                                          const viennacl::vector_base<ScalarType>,
-                                          viennacl::op_min >(v1, v1);
-    }
-
-    template< typename ScalarType>
-    viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
-                                 const viennacl::vector_base<ScalarType>,
-                                 viennacl::op_min >
-    min(viennacl::vector<ScalarType> const & v1)
-    {
-       //std::cout << "viennacl .. " << std::endl;
-      return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
-                                          const viennacl::vector_base<ScalarType>,
-                                          viennacl::op_min >(v1, v1);
-    }
-
-    // with vector expression:
-    template<typename LHS, typename RHS, typename OP>
-    viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                viennacl::op_min>
-    min(viennacl::vector_expression<const LHS, const RHS, OP> const & vector)
-    {
-      return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                          const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                          viennacl::op_min >(vector, vector);
-    }
-
-
-
-  } // end namespace linalg
-} // end namespace viennacl
-#endif
-
-
-
-
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/misc_operations.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/misc_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/misc_operations.hpp
deleted file mode 100644
index 208573f..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/misc_operations.hpp
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef VIENNACL_LINALG_MISC_OPERATIONS_HPP_
-#define VIENNACL_LINALG_MISC_OPERATIONS_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/linalg/misc_operations.hpp
-    @brief Implementations of miscellaneous operations
-*/
-
-#include "viennacl/forwards.h"
-#include "viennacl/scalar.hpp"
-#include "viennacl/vector.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/host_based/misc_operations.hpp"
-
-#ifdef VIENNACL_WITH_OPENCL
-  #include "viennacl/linalg/opencl/misc_operations.hpp"
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-  #include "viennacl/linalg/cuda/misc_operations.hpp"
-#endif
-
-namespace viennacl
-{
-  namespace linalg
-  {
-
-    namespace detail
-    {
-
-      template<typename ScalarType>
-      void level_scheduling_substitute(vector<ScalarType> & vec,
-                                  viennacl::backend::mem_handle const & row_index_array,
-                                  viennacl::backend::mem_handle const & row_buffer,
-                                  viennacl::backend::mem_handle const & col_buffer,
-                                  viennacl::backend::mem_handle const & element_buffer,
-                                  vcl_size_t num_rows
-                                  )
-      {
-        assert( viennacl::traits::handle(vec).get_active_handle_id() == row_index_array.get_active_handle_id() && bool("Incompatible memory domains"));
-        assert( viennacl::traits::handle(vec).get_active_handle_id() ==      row_buffer.get_active_handle_id() && bool("Incompatible memory domains"));
-        assert( viennacl::traits::handle(vec).get_active_handle_id() ==      col_buffer.get_active_handle_id() && bool("Incompatible memory domains"));
-        assert( viennacl::traits::handle(vec).get_active_handle_id() ==  element_buffer.get_active_handle_id() && bool("Incompatible memory domains"));
-
-        switch (viennacl::traits::handle(vec).get_active_handle_id())
-        {
-          case viennacl::MAIN_MEMORY:
-            viennacl::linalg::host_based::detail::level_scheduling_substitute(vec, row_index_array, row_buffer, col_buffer, element_buffer, num_rows);
-            break;
-#ifdef VIENNACL_WITH_OPENCL
-          case viennacl::OPENCL_MEMORY:
-            viennacl::linalg::opencl::detail::level_scheduling_substitute(vec, row_index_array, row_buffer, col_buffer, element_buffer, num_rows);
-            break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-          case viennacl::CUDA_MEMORY:
-            viennacl::linalg::cuda::detail::level_scheduling_substitute(vec, row_index_array, row_buffer, col_buffer, element_buffer, num_rows);
-            break;
-#endif
-          case viennacl::MEMORY_NOT_INITIALIZED:
-            throw memory_exception("not initialised!");
-          default:
-            throw memory_exception("not implemented");
-        }
-      }
-
-
-
-
-    } //namespace detail
-
-
-  } //namespace linalg
-} //namespace viennacl
-
-
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/mixed_precision_cg.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/mixed_precision_cg.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/mixed_precision_cg.hpp
deleted file mode 100644
index 78254b3..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/mixed_precision_cg.hpp
+++ /dev/null
@@ -1,199 +0,0 @@
-#ifndef VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
-#define VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/linalg/mixed_precision_cg.hpp
-    @brief The conjugate gradient method using mixed precision is implemented here. Experimental.
-*/
-
-#include <vector>
-#include <map>
-#include <cmath>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/ilu.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/traits/clear.hpp"
-#include "viennacl/traits/size.hpp"
-#include "viennacl/meta/result_of.hpp"
-#include "viennacl/backend/memory.hpp"
-
-#include "viennacl/vector_proxy.hpp"
-
-namespace viennacl
-{
-  namespace linalg
-  {
-
-    /** @brief A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve() function
-    */
-    class mixed_precision_cg_tag
-    {
-      public:
-        /** @brief The constructor
-        *
-        * @param tol              Relative tolerance for the residual (solver quits if ||r|| < tol * ||r_initial||)
-        * @param max_iterations   The maximum number of iterations
-        * @param inner_tol        Inner tolerance for the low-precision iterations
-        */
-        mixed_precision_cg_tag(double tol = 1e-8, unsigned int max_iterations = 300, float inner_tol = 1e-2f) : tol_(tol), iterations_(max_iterations), inner_tol_(inner_tol) {}
-
-        /** @brief Returns the relative tolerance */
-        double tolerance() const { return tol_; }
-        /** @brief Returns the relative tolerance */
-        float inner_tolerance() const { return inner_tol_; }
-        /** @brief Returns the maximum number of iterations */
-        unsigned int max_iterations() const { return iterations_; }
-
-        /** @brief Return the number of solver iterations: */
-        unsigned int iters() const { return iters_taken_; }
-        void iters(unsigned int i) const { iters_taken_ = i; }
-
-        /** @brief Returns the estimated relative error at the end of the solver run */
-        double error() const { return last_error_; }
-        /** @brief Sets the estimated relative error at the end of the solver run */
-        void error(double e) const { last_error_ = e; }
-
-
-      private:
-        double tol_;
-        unsigned int iterations_;
-        float inner_tol_;
-
-        //return values from solver
-        mutable unsigned int iters_taken_;
-        mutable double last_error_;
-    };
-
-
-    /** @brief Implementation of the conjugate gradient solver without preconditioner
-    *
-    * Following the algorithm in the book by Y. Saad "Iterative Methods for sparse linear systems"
-    *
-    * @param matrix     The system matrix
-    * @param rhs        The load vector
-    * @param tag        Solver configuration tag
-    * @return The result vector
-    */
-    template<typename MatrixType, typename VectorType>
-    VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag)
-    {
-      //typedef typename VectorType::value_type      ScalarType;
-      typedef typename viennacl::result_of::cpu_value_type<VectorType>::type    CPU_ScalarType;
-
-      //std::cout << "Starting CG" << std::endl;
-      vcl_size_t problem_size = viennacl::traits::size(rhs);
-      VectorType result(rhs);
-      viennacl::traits::clear(result);
-
-      VectorType residual = rhs;
-
-      CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(rhs, rhs);
-      CPU_ScalarType new_ip_rr = 0;
-      CPU_ScalarType norm_rhs_squared = ip_rr;
-
-      if (norm_rhs_squared <= 0) //solution is zero if RHS norm is zero
-        return result;
-
-      viennacl::vector<float> residual_low_precision(problem_size, viennacl::traits::context(rhs));
-      viennacl::vector<float> result_low_precision(problem_size, viennacl::traits::context(rhs));
-      viennacl::vector<float> p_low_precision(problem_size, viennacl::traits::context(rhs));
-      viennacl::vector<float> tmp_low_precision(problem_size, viennacl::traits::context(rhs));
-      float inner_ip_rr = static_cast<float>(ip_rr);
-      float new_inner_ip_rr = 0;
-      float initial_inner_rhs_norm_squared = static_cast<float>(ip_rr);
-      float alpha;
-      float beta;
-
-      // transfer rhs to single precision:
-      p_low_precision = rhs;
-      residual_low_precision = p_low_precision;
-
-      // transfer matrix to single precision:
-      viennacl::compressed_matrix<float> matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs));
-      viennacl::backend::memory_copy(matrix.handle1(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle1()), 0, 0, matrix_low_precision.handle1().raw_size() );
-      viennacl::backend::memory_copy(matrix.handle2(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle2()), 0, 0, matrix_low_precision.handle2().raw_size() );
-
-      viennacl::vector_base<CPU_ScalarType> matrix_elements_high_precision(const_cast<viennacl::backend::mem_handle &>(matrix.handle()), matrix.nnz(), 0, 1);
-      viennacl::vector_base<float>          matrix_elements_low_precision(matrix_low_precision.handle(), matrix.nnz(), 0, 1);
-      matrix_elements_low_precision = matrix_elements_high_precision;
-      matrix_low_precision.generate_row_block_information();
-
-      for (unsigned int i = 0; i < tag.max_iterations(); ++i)
-      {
-        tag.iters(i+1);
-
-        // lower precision 'inner iteration'
-        tmp_low_precision = viennacl::linalg::prod(matrix_low_precision, p_low_precision);
-
-        alpha = inner_ip_rr / viennacl::linalg::inner_prod(tmp_low_precision, p_low_precision);
-        result_low_precision += alpha * p_low_precision;
-        residual_low_precision -= alpha * tmp_low_precision;
-
-        new_inner_ip_rr = viennacl::linalg::inner_prod(residual_low_precision, residual_low_precision);
-
-        beta = new_inner_ip_rr / inner_ip_rr;
-        inner_ip_rr = new_inner_ip_rr;
-
-        p_low_precision = residual_low_precision + beta * p_low_precision;
-
-        //
-        // If enough progress has been achieved, update current residual with high precision evaluation
-        // This is effectively a restart of the CG method
-        //
-        if (new_inner_ip_rr < tag.inner_tolerance() * initial_inner_rhs_norm_squared || i == tag.max_iterations()-1)
-        {
-          residual = result_low_precision; // reusing residual vector as temporary buffer for conversion. Overwritten below anyway
-          result += residual;
-
-          // residual = b - Ax  (without introducing a temporary)
-          residual = viennacl::linalg::prod(matrix, result);
-          residual = rhs - residual;
-
-          new_ip_rr = viennacl::linalg::inner_prod(residual, residual);
-          if (new_ip_rr / norm_rhs_squared < tag.tolerance() *  tag.tolerance())//squared norms involved here
-            break;
-
-          p_low_precision = residual;
-
-          result_low_precision.clear();
-          residual_low_precision = p_low_precision;
-          initial_inner_rhs_norm_squared = static_cast<float>(new_ip_rr);
-          inner_ip_rr = static_cast<float>(new_ip_rr);
-        }
-      }
-
-      //store last error estimate:
-      tag.error(std::sqrt(new_ip_rr / norm_rhs_squared));
-
-      return result;
-    }
-
-    template<typename MatrixType, typename VectorType>
-    VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag, viennacl::linalg::no_precond)
-    {
-      return solve(matrix, rhs, tag);
-    }
-
-
-  }
-}
-
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/nmf.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/nmf.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/nmf.hpp
deleted file mode 100644
index c962c8e..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/nmf.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef VIENNACL_LINALG_NMF_HPP
-#define VIENNACL_LINALG_NMF_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License:         MIT (X11), see file LICENSE in the base directory
- ============================================================================= */
-
-/** @file viennacl/linalg/nmf.hpp
- @brief Provides a nonnegative matrix factorization implementation.  Experimental.
-
-
- */
-
-#include "viennacl/vector.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/linalg/norm_frobenius.hpp"
-
-#include "viennacl/linalg/host_based/nmf_operations.hpp"
-
-#ifdef VIENNACL_WITH_OPENCL
-#include "viennacl/linalg/opencl/kernels/nmf.hpp"
-#include "viennacl/linalg/opencl/nmf_operations.hpp"
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-#include "viennacl/linalg/cuda/nmf_operations.hpp"
-#endif
-
-namespace viennacl
-{
-  namespace linalg
-  {
-
-    /** @brief The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized.
-     *
-     * @param V     Input matrix
-     * @param W     First factor
-     * @param H     Second factor
-     * @param conf  A configuration object holding tolerances and the like
-     */
-    template<typename ScalarType>
-    void nmf(viennacl::matrix_base<ScalarType> const & V, viennacl::matrix_base<ScalarType> & W,
-        viennacl::matrix_base<ScalarType> & H, viennacl::linalg::nmf_config const & conf)
-    {
-      assert(V.size1() == W.size1() && V.size2() == H.size2() && bool("Dimensions of W and H don't allow for V = W * H"));
-      assert(W.size2() == H.size1() && bool("Dimensions of W and H don't match, prod(W, H) impossible"));
-
-      switch (viennacl::traits::handle(V).get_active_handle_id())
-      {
-        case viennacl::MAIN_MEMORY:
-          viennacl::linalg::host_based::nmf(V, W, H, conf);
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-          case viennacl::OPENCL_MEMORY:
-          viennacl::linalg::opencl::nmf(V,W,H,conf);
-          break;
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-          case viennacl::CUDA_MEMORY:
-          viennacl::linalg::cuda::nmf(V,W,H,conf);
-          break;
-#endif
-
-        case viennacl::MEMORY_NOT_INITIALIZED:
-          throw memory_exception("not initialised!");
-        default:
-          throw memory_exception("not implemented");
-
-      }
-
-    }
-  }
-}
-
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/norm_1.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_1.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_1.hpp
deleted file mode 100644
index e16238b..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_1.hpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#ifndef VIENNACL_LINALG_NORM_1_HPP_
-#define VIENNACL_LINALG_NORM_1_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file norm_1.hpp
-    @brief Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations.
-*/
-
-#include <cmath>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/meta/enable_if.hpp"
-#include "viennacl/meta/tag_of.hpp"
-
-namespace viennacl
-{
-  //
-  // generic norm_1 function
-  //   uses tag dispatch to identify which algorithm
-  //   should be called
-  //
-  namespace linalg
-  {
-
-    #ifdef VIENNACL_WITH_UBLAS
-    // ----------------------------------------------------
-    // UBLAS
-    //
-    template< typename VectorT >
-    typename viennacl::enable_if< viennacl::is_ublas< typename viennacl::traits::tag_of< VectorT >::type >::value,
-                                  typename VectorT::value_type
-                                >::type
-    norm_1(VectorT const& vector)
-    {
-      // std::cout << "ublas .. " << std::endl;
-      return boost::numeric::ublas::norm_1(vector);
-    }
-    #endif
-
-
-    // ----------------------------------------------------
-    // STL
-    //
-    template< typename T, typename A >
-    T norm_1(std::vector<T, A> const & v1)
-    {
-      //std::cout << "stl .. " << std::endl;
-      T result = 0;
-      for (typename std::vector<T, A>::size_type i=0; i<v1.size(); ++i)
-        result += std::fabs(v1[i]);
-
-      return result;
-    }
-
-    // ----------------------------------------------------
-    // VIENNACL
-    //
-    template< typename ScalarType>
-    viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
-                                 const viennacl::vector_base<ScalarType>,
-                                 viennacl::op_norm_1 >
-    norm_1(viennacl::vector_base<ScalarType> const & vector)
-    {
-      return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
-                                          const viennacl::vector_base<ScalarType>,
-                                          viennacl::op_norm_1 >(vector, vector);
-    }
-
-    // with vector expression:
-    template<typename LHS, typename RHS, typename OP>
-    viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                viennacl::op_norm_1>
-    norm_1(viennacl::vector_expression<const LHS, const RHS, OP> const & vector)
-    {
-      return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                          const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                          viennacl::op_norm_1 >(vector, vector);
-    }
-
-  } // end namespace linalg
-} // end namespace viennacl
-#endif
-
-
-
-
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/norm_2.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_2.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_2.hpp
deleted file mode 100644
index babb285..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_2.hpp
+++ /dev/null
@@ -1,140 +0,0 @@
-#ifndef VIENNACL_LINALG_NORM_2_HPP_
-#define VIENNACL_LINALG_NORM_2_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file norm_2.hpp
-    @brief Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations.
-*/
-
-#include <cmath>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/meta/enable_if.hpp"
-#include "viennacl/meta/tag_of.hpp"
-
-namespace viennacl
-{
-  //
-  // generic norm_2 function
-  //   uses tag dispatch to identify which algorithm
-  //   should be called
-  //
-  namespace linalg
-  {
-    #ifdef VIENNACL_WITH_MTL4
-    // ----------------------------------------------------
-    // MTL4
-    //
-    template< typename VectorT >
-    typename viennacl::enable_if< viennacl::is_mtl4< typename viennacl::traits::tag_of< VectorT >::type >::value,
-                                  typename VectorT::value_type>::type
-    norm_2(VectorT const & v)
-    {
-      return mtl::two_norm(v);
-    }
-    #endif
-
-    #ifdef VIENNACL_WITH_ARMADILLO
-    // ----------------------------------------------------
-    // Armadillo
-    //
-    template<typename NumericT>
-    NumericT norm_2(arma::Col<NumericT> const& v)
-    {
-      return norm(v);
-    }
-    #endif
-
-    #ifdef VIENNACL_WITH_EIGEN
-    // ----------------------------------------------------
-    // EIGEN
-    //
-    template< typename VectorT >
-    typename viennacl::enable_if< viennacl::is_eigen< typename viennacl::traits::tag_of< VectorT >::type >::value,
-                                  typename VectorT::RealScalar>::type
-    norm_2(VectorT const & v)
-    {
-      return v.norm();
-    }
-    #endif
-
-
-    #ifdef VIENNACL_WITH_UBLAS
-    // ----------------------------------------------------
-    // UBLAS
-    //
-    template< typename VectorT >
-    typename viennacl::enable_if< viennacl::is_ublas< typename viennacl::traits::tag_of< VectorT >::type >::value,
-                                  typename VectorT::value_type>::type
-    norm_2(VectorT const & v)
-    {
-      return boost::numeric::ublas::norm_2(v);
-    }
-    #endif
-
-
-    // ----------------------------------------------------
-    // STL
-    //
-    template< typename T, typename A >
-    T norm_2(std::vector<T, A> const & v1)
-    {
-      T result = 0;
-      for (typename std::vector<T, A>::size_type i=0; i<v1.size(); ++i)
-        result += v1[i] * v1[i];
-
-      return std::sqrt(result);
-    }
-
-    // ----------------------------------------------------
-    // VIENNACL
-    //
-    template< typename ScalarType>
-    viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
-                                 const viennacl::vector_base<ScalarType>,
-                                 viennacl::op_norm_2 >
-    norm_2(viennacl::vector_base<ScalarType> const & v)
-    {
-       //std::cout << "viennacl .. " << std::endl;
-      return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
-                                          const viennacl::vector_base<ScalarType>,
-                                          viennacl::op_norm_2 >(v, v);
-    }
-
-    // with vector expression:
-    template<typename LHS, typename RHS, typename OP>
-    viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                viennacl::op_norm_2>
-    norm_2(viennacl::vector_expression<const LHS, const RHS, OP> const & vector)
-    {
-      return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                          const viennacl::vector_expression<const LHS, const RHS, OP>,
-                                          viennacl::op_norm_2>(vector, vector);
-    }
-
-
-  } // end namespace linalg
-} // end namespace viennacl
-#endif
-
-
-
-
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/norm_frobenius.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_frobenius.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_frobenius.hpp
deleted file mode 100644
index 6873a53..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_frobenius.hpp
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef VIENNACL_LINALG_NORM_FROBENIUS_HPP_
-#define VIENNACL_LINALG_NORM_FROBENIUS_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/linalg/norm_frobenius.hpp
-    @brief Generic interface for the Frobenius norm.
-*/
-
-#include <cmath>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/meta/enable_if.hpp"
-#include "viennacl/meta/tag_of.hpp"
-
-namespace viennacl
-{
-  //
-  // generic norm_frobenius function
-  //   uses tag dispatch to identify which algorithm
-  //   should be called
-  //
-  namespace linalg
-  {
-
-    #ifdef VIENNACL_WITH_UBLAS
-    // ----------------------------------------------------
-    // UBLAS
-    //
-    template< typename VectorT >
-    typename viennacl::enable_if< viennacl::is_ublas< typename viennacl::traits::tag_of< VectorT >::type >::value,
-                                  typename VectorT::value_type
-                                >::type
-    norm_frobenius(VectorT const& v1)
-    {
-      return boost::numeric::ublas::norm_frobenius(v1);
-    }
-    #endif
-
-
-    // ----------------------------------------------------
-    // VIENNACL
-    //
-    template<typename NumericT>
-    scalar_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_norm_frobenius>
-    norm_frobenius(const matrix_base<NumericT> & A)
-    {
-      return scalar_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_norm_frobenius>(A, A);
-    }
-
-  } // end namespace linalg
-} // end namespace viennacl
-#endif
-
-
-
-
-