You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ap...@apache.org on 2016/06/10 16:52:52 UTC
[47/51] [partial] mahout git commit: Revert "(nojira) add native-viennaCL module to codebase. closes apache/mahout#241"

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp
deleted file mode 100644
index e463e88..0000000
--- a/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp
+++ /dev/null
@@ -1,101 +0,0 @@
-/* =========================================================================
-   Copyright (c) 2010-2014, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the PDF manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-#include "viennacl.hpp"
-#include "viennacl/backend/mem_handle.hpp"
-
-
-
-static ViennaCLStatus init_cuda_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A)
-{
-#ifdef VIENNACL_WITH_CUDA
-  h.switch_active_handle_id(viennacl::CUDA_MEMORY);
-  h.cuda_handle().reset(A->cuda_mem);
-  h.cuda_handle().inc();
-  if (A->precision == ViennaCLFloat)
-    h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(float)); // not necessary, but still set for conciseness
-  else if (A->precision == ViennaCLDouble)
-    h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(double)); // not necessary, but still set for conciseness
-  else
-    return ViennaCLGenericFailure;
-
-  return ViennaCLSuccess;
-#else
-  (void)h;
-  (void)A;
-  return ViennaCLGenericFailure;
-#endif
-}
-
-static ViennaCLStatus init_opencl_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A)
-{
-#ifdef VIENNACL_WITH_OPENCL
-  h.switch_active_handle_id(viennacl::OPENCL_MEMORY);
-  h.opencl_handle() = A->opencl_mem;
-  h.opencl_handle().inc();
-  if (A->precision == ViennaCLFloat)
-    h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(float)); // not necessary, but still set for conciseness
-  else if (A->precision == ViennaCLDouble)
-    h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(double)); // not necessary, but still set for conciseness
-  else
-    return ViennaCLGenericFailure;
-
-  return ViennaCLSuccess;
-#else
-  (void)h;
-  (void)A;
-  return ViennaCLGenericFailure;
-#endif
-}
-
-
-static ViennaCLStatus init_host_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A)
-{
-  h.switch_active_handle_id(viennacl::MAIN_MEMORY);
-  h.ram_handle().reset(A->host_mem);
-  h.ram_handle().inc();
-  if (A->precision == ViennaCLFloat)
-    h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(float)); // not necessary, but still set for conciseness
-  else if (A->precision == ViennaCLDouble)
-    h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(double)); // not necessary, but still set for conciseness
-  else
-    return ViennaCLGenericFailure;
-
-  return ViennaCLSuccess;
-}
-
-
-static ViennaCLStatus init_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A)
-{
-  switch (A->backend->backend_type)
-  {
-    case ViennaCLCUDA:
-      return init_cuda_matrix(h, A);
-
-    case ViennaCLOpenCL:
-      return init_opencl_matrix(h, A);
-
-    case ViennaCLHost:
-      return init_host_matrix(h, A);
-
-    default:
-      return ViennaCLGenericFailure;
-  }
-}
-
-
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp
deleted file mode 100644
index 8be00d7..0000000
--- a/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp
+++ /dev/null
@@ -1,101 +0,0 @@
-/* =========================================================================
-   Copyright (c) 2010-2014, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the PDF manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-#include "viennacl.hpp"
-#include "viennacl/backend/mem_handle.hpp"
-
-
-
-static ViennaCLStatus init_cuda_vector(viennacl::backend::mem_handle & h, ViennaCLVector x)
-{
-#ifdef VIENNACL_WITH_CUDA
-  h.switch_active_handle_id(viennacl::CUDA_MEMORY);
-  h.cuda_handle().reset(x->cuda_mem);
-  h.cuda_handle().inc();
-  if (x->precision == ViennaCLFloat)
-    h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * x->size * sizeof(float)); // not necessary, but still set for conciseness
-  else if (x->precision == ViennaCLDouble)
-    h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * x->size * sizeof(double)); // not necessary, but still set for conciseness
-  else
-    return ViennaCLGenericFailure;
-
-  return ViennaCLSuccess;
-#else
-  (void)h;
-  (void)x;
-  return ViennaCLGenericFailure;
-#endif
-}
-
-static ViennaCLStatus init_opencl_vector(viennacl::backend::mem_handle & h, ViennaCLVector x)
-{
-#ifdef VIENNACL_WITH_OPENCL
-  h.switch_active_handle_id(viennacl::OPENCL_MEMORY);
-  h.opencl_handle() = x->opencl_mem;
-  h.opencl_handle().inc();
-  if (x->precision == ViennaCLFloat)
-    h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(float)); // not necessary, but still set for conciseness
-  else if (x->precision == ViennaCLDouble)
-    h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(double)); // not necessary, but still set for conciseness
-  else
-    return ViennaCLGenericFailure;
-
-  return ViennaCLSuccess;
-#else
-  (void)h;
-  (void)x;
-  return ViennaCLGenericFailure;
-#endif
-}
-
-
-static ViennaCLStatus init_host_vector(viennacl::backend::mem_handle & h, ViennaCLVector x)
-{
-  h.switch_active_handle_id(viennacl::MAIN_MEMORY);
-  h.ram_handle().reset(x->host_mem);
-  h.ram_handle().inc();
-  if (x->precision == ViennaCLFloat)
-    h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(float)); // not necessary, but still set for conciseness
-  else if (x->precision == ViennaCLDouble)
-    h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(double)); // not necessary, but still set for conciseness
-  else
-    return ViennaCLGenericFailure;
-
-  return ViennaCLSuccess;
-}
-
-
-static ViennaCLStatus init_vector(viennacl::backend::mem_handle & h, ViennaCLVector x)
-{
-  switch (x->backend->backend_type)
-  {
-    case ViennaCLCUDA:
-      return init_cuda_vector(h, x);
-
-    case ViennaCLOpenCL:
-      return init_opencl_vector(h, x);
-
-    case ViennaCLHost:
-      return init_host_vector(h, x);
-
-    default:
-      return ViennaCLGenericFailure;
-  }
-}
-
-
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp
deleted file mode 100644
index c66c848..0000000
--- a/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp
+++ /dev/null
@@ -1,141 +0,0 @@
-#ifndef VIENNACL_VIENNACL_PRIVATE_HPP
-#define VIENNACL_VIENNACL_PRIVATE_HPP
-
-
-/* =========================================================================
-   Copyright (c) 2010-2014, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the PDF manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-#include <stdlib.h>
-
-#ifdef VIENNACL_WITH_OPENCL
-#ifdef __APPLE__
-#include <OpenCL/cl.h>
-#else
-#include <CL/cl.h>
-#endif
-#endif
-
-#include "viennacl.hpp"
-
-
-/************* Backend Management ******************/
-
-struct ViennaCLCUDABackend_impl
-{
-    //TODO: Add stream and/or device descriptors here
-};
-
-struct ViennaCLOpenCLBackend_impl
-{
-  ViennaCLInt context_id;
-};
-
-struct ViennaCLHostBackend_impl
-{
-  // Nothing to specify *at the moment*
-};
-
-
-/** @brief Generic backend for CUDA, OpenCL, host-based stuff */
-struct ViennaCLBackend_impl
-{
-  ViennaCLBackendTypes backend_type;
-
-  ViennaCLCUDABackend_impl     cuda_backend;
-  ViennaCLOpenCLBackend_impl   opencl_backend;
-  ViennaCLHostBackend_impl     host_backend;
-};
-
-
-
-/******** User Types **********/
-
-struct ViennaCLHostScalar_impl
-{
-  ViennaCLPrecision  precision;
-
-  union {
-    float  value_float;
-    double value_double;
-  };
-};
-
-struct ViennaCLScalar_impl
-{
-  ViennaCLBackend    backend;
-  ViennaCLPrecision  precision;
-
-  // buffer:
-#ifdef VIENNACL_WITH_CUDA
-  char * cuda_mem;
-#endif
-#ifdef VIENNACL_WITH_OPENCL
-  cl_mem opencl_mem;
-#endif
-  char * host_mem;
-
-  ViennaCLInt   offset;
-};
-
-struct ViennaCLVector_impl
-{
-  ViennaCLBackend    backend;
-  ViennaCLPrecision  precision;
-
-  // buffer:
-#ifdef VIENNACL_WITH_CUDA
-  char * cuda_mem;
-#endif
-#ifdef VIENNACL_WITH_OPENCL
-  cl_mem opencl_mem;
-#endif
-  char * host_mem;
-
-  ViennaCLInt   offset;
-  ViennaCLInt   inc;
-  ViennaCLInt   size;
-};
-
-struct ViennaCLMatrix_impl
-{
-  ViennaCLBackend    backend;
-  ViennaCLPrecision  precision;
-  ViennaCLOrder      order;
-  ViennaCLTranspose  trans;
-
-  // buffer:
-#ifdef VIENNACL_WITH_CUDA
-  char * cuda_mem;
-#endif
-#ifdef VIENNACL_WITH_OPENCL
-  cl_mem opencl_mem;
-#endif
-  char * host_mem;
-
-  ViennaCLInt   size1;
-  ViennaCLInt   start1;
-  ViennaCLInt   stride1;
-  ViennaCLInt   internal_size1;
-
-  ViennaCLInt   size2;
-  ViennaCLInt   start2;
-  ViennaCLInt   stride2;
-  ViennaCLInt   internal_size2;
-};
-
-
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp
deleted file mode 100644
index ccfd035..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp
+++ /dev/null
@@ -1,171 +0,0 @@
-#ifndef VIENNACL_BACKEND_CPU_RAM_HPP_
-#define VIENNACL_BACKEND_CPU_RAM_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/backend/cpu_ram.hpp
-    @brief Implementations for the OpenCL backend functionality
-*/
-
-#include <cassert>
-#include <vector>
-#ifdef VIENNACL_WITH_AVX2
-#include <stdlib.h>
-#endif
-
-#include "viennacl/forwards.h"
-#include "viennacl/tools/shared_ptr.hpp"
-
-namespace viennacl
-{
-namespace backend
-{
-namespace cpu_ram
-{
-typedef viennacl::tools::shared_ptr<char>  handle_type;
-// Requirements for backend:
-
-// * memory_create(size, host_ptr)
-// * memory_copy(src, dest, offset_src, offset_dest, size)
-// * memory_write_from_main_memory(src, offset, size,
-//                                 dest, offset, size)
-// * memory_read_to_main_memory(src, offset, size
-//                              dest, offset, size)
-// *
-//
-
-namespace detail
-{
-  /** @brief Helper struct for deleting an pointer to an array */
-  template<class U>
-  struct array_deleter
-  {
-#ifdef VIENNACL_WITH_AVX2
-    void operator()(U* p) const { free(p); }
-#else
-    void operator()(U* p) const { delete[] p; }
-#endif
-  };
-
-}
-
-/** @brief Creates an array of the specified size in main RAM. If the second argument is provided, the buffer is initialized with data from that pointer.
- *
- * @param size_in_bytes   Number of bytes to allocate
- * @param host_ptr        Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data.
- *
- */
-inline handle_type  memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL)
-{
-#ifdef VIENNACL_WITH_AVX2
-  // Note: aligned_alloc not available on all compilers. Consider platform-specific alternatives such as posix_memalign()
-  if (!host_ptr)
-    return handle_type(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>());
-
-  handle_type new_handle(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>());
-#else
-  if (!host_ptr)
-    return handle_type(new char[size_in_bytes], detail::array_deleter<char>());
-
-  handle_type new_handle(new char[size_in_bytes], detail::array_deleter<char>());
-#endif
-
-  // copy data:
-  char * raw_ptr = new_handle.get();
-  const char * data_ptr = static_cast<const char *>(host_ptr);
-#ifdef VIENNACL_WITH_OPENMP
-    #pragma omp parallel for
-#endif
-  for (long i=0; i<long(size_in_bytes); ++i)
-    raw_ptr[i] = data_ptr[i];
-
-  return new_handle;
-}
-
-/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'dst_buffer + dst_offset'.
- *
- *  @param src_buffer     A smart pointer to the begin of an allocated buffer
- *  @param dst_buffer     A smart pointer to the end of an allocated buffer
- *  @param src_offset     Offset of the first byte to be written from the address given by 'src_buffer' (in bytes)
- *  @param dst_offset     Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes)
- *  @param bytes_to_copy  Number of bytes to be copied
- */
-inline void memory_copy(handle_type const & src_buffer,
-                        handle_type & dst_buffer,
-                        vcl_size_t src_offset,
-                        vcl_size_t dst_offset,
-                        vcl_size_t bytes_to_copy)
-{
-  assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
-  assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
-
-#ifdef VIENNACL_WITH_OPENMP
-  #pragma omp parallel for
-#endif
-  for (long i=0; i<long(bytes_to_copy); ++i)
-    dst_buffer.get()[vcl_size_t(i)+dst_offset] = src_buffer.get()[vcl_size_t(i) + src_offset];
-}
-
-/** @brief Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'
- *
- * @param dst_buffer    A smart pointer to the beginning of an allocated buffer
- * @param dst_offset    Offset of the first written byte from the beginning of 'dst_buffer' (in bytes)
- * @param bytes_to_copy Number of bytes to be copied
- * @param ptr           Pointer to the first byte to be written
- */
-inline void memory_write(handle_type & dst_buffer,
-                         vcl_size_t dst_offset,
-                         vcl_size_t bytes_to_copy,
-                         const void * ptr,
-                         bool /*async*/)
-{
-  assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
-
-#ifdef VIENNACL_WITH_OPENMP
-  #pragma omp parallel for
-#endif
-  for (long i=0; i<long(bytes_to_copy); ++i)
-    dst_buffer.get()[vcl_size_t(i)+dst_offset] = static_cast<const char *>(ptr)[i];
-}
-
-/** @brief Reads data from a buffer back to main RAM.
- *
- * @param src_buffer         A smart pointer to the beginning of an allocated source buffer
- * @param src_offset         Offset of the first byte to be read from the beginning of src_buffer (in bytes_
- * @param bytes_to_copy      Number of bytes to be read
- * @param ptr                Location in main RAM where to read data should be written to
- */
-inline void memory_read(handle_type const & src_buffer,
-                        vcl_size_t src_offset,
-                        vcl_size_t bytes_to_copy,
-                        void * ptr,
-                        bool /*async*/)
-{
-  assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
-
-#ifdef VIENNACL_WITH_OPENMP
-  #pragma omp parallel for
-#endif
-  for (long i=0; i<long(bytes_to_copy); ++i)
-    static_cast<char *>(ptr)[i] = src_buffer.get()[vcl_size_t(i)+src_offset];
-}
-
-}
-} //backend
-} //viennacl
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp
deleted file mode 100644
index 641bfea..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp
+++ /dev/null
@@ -1,206 +0,0 @@
-#ifndef VIENNACL_BACKEND_CUDA_HPP_
-#define VIENNACL_BACKEND_CUDA_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/backend/cuda.hpp
-    @brief Implementations for the CUDA backend functionality
-*/
-
-
-#include <iostream>
-#include <vector>
-#include <cassert>
-#include <stdexcept>
-#include <sstream>
-
-#include "viennacl/forwards.h"
-#include "viennacl/tools/shared_ptr.hpp"
-
-// includes CUDA
-#include <cuda_runtime.h>
-
-#define VIENNACL_CUDA_ERROR_CHECK(err)  detail::cuda_error_check (err, __FILE__, __LINE__)
-
-namespace viennacl
-{
-namespace backend
-{
-namespace cuda
-{
-
-typedef viennacl::tools::shared_ptr<char>  handle_type;
-// Requirements for backend:
-
-// * memory_create(size, host_ptr)
-// * memory_copy(src, dest, offset_src, offset_dest, size)
-// * memory_write_from_main_memory(src, offset, size,
-//                                 dest, offset, size)
-// * memory_read_to_main_memory(src, offset, size
-//                              dest, offset, size)
-// *
-//
-
-class cuda_exception : public std::runtime_error
-{
-public:
-  cuda_exception(std::string const & what_arg, cudaError_t err_code) : std::runtime_error(what_arg), error_code_(err_code) {}
-
-  cudaError_t error_code() const { return error_code_; }
-
-private:
-  cudaError_t error_code_;
-};
-
-namespace detail
-{
-
-  inline void cuda_error_check(cudaError error_code, const char *file, const int line )
-  {
-    if (cudaSuccess != error_code)
-    {
-      std::stringstream ss;
-      ss << file << "(" << line << "): " << ": CUDA Runtime API error " << error_code << ": " << cudaGetErrorString( error_code ) << std::endl;
-      throw viennacl::backend::cuda::cuda_exception(ss.str(), error_code);
-    }
-  }
-
-
-  /** @brief Functor for deleting a CUDA handle. Used within the smart pointer class. */
-  template<typename U>
-  struct cuda_deleter
-  {
-    void operator()(U * p) const
-    {
-      //std::cout << "Freeing handle " << reinterpret_cast<void *>(p) << std::endl;
-      cudaFree(p);
-    }
-  };
-
-}
-
-/** @brief Creates an array of the specified size on the CUDA device. If the second argument is provided, the buffer is initialized with data from that pointer.
- *
- * @param size_in_bytes   Number of bytes to allocate
- * @param host_ptr        Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data.
- *
- */
-inline handle_type  memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL)
-{
-  void * dev_ptr = NULL;
-  VIENNACL_CUDA_ERROR_CHECK( cudaMalloc(&dev_ptr, size_in_bytes) );
-  //std::cout << "Allocated new dev_ptr " << dev_ptr << " of size " <<  size_in_bytes << std::endl;
-
-  if (!host_ptr)
-    return handle_type(reinterpret_cast<char *>(dev_ptr), detail::cuda_deleter<char>());
-
-  handle_type new_handle(reinterpret_cast<char*>(dev_ptr), detail::cuda_deleter<char>());
-
-  // copy data:
-  //std::cout << "Filling new handle from host_ptr " << host_ptr << std::endl;
-  cudaMemcpy(new_handle.get(), host_ptr, size_in_bytes, cudaMemcpyHostToDevice);
-
-  return new_handle;
-}
-
-
-/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' on the CUDA device to memory starting at address 'dst_buffer + dst_offset' on the same CUDA device.
- *
- *  @param src_buffer     A smart pointer to the begin of an allocated CUDA buffer
- *  @param dst_buffer     A smart pointer to the end of an allocated CUDA buffer
- *  @param src_offset     Offset of the first byte to be written from the address given by 'src_buffer' (in bytes)
- *  @param dst_offset     Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes)
- *  @param bytes_to_copy  Number of bytes to be copied
- */
-inline void memory_copy(handle_type const & src_buffer,
-                        handle_type & dst_buffer,
-                        vcl_size_t src_offset,
-                        vcl_size_t dst_offset,
-                        vcl_size_t bytes_to_copy)
-{
-  assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
-  assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
-
-  cudaMemcpy(reinterpret_cast<void *>(dst_buffer.get() + dst_offset),
-             reinterpret_cast<void *>(src_buffer.get() + src_offset),
-             bytes_to_copy,
-             cudaMemcpyDeviceToDevice);
-}
-
-
-/** @brief Writes data from main RAM identified by 'ptr' to the CUDA buffer identified by 'dst_buffer'
- *
- * @param dst_buffer    A smart pointer to the beginning of an allocated CUDA buffer
- * @param dst_offset    Offset of the first written byte from the beginning of 'dst_buffer' (in bytes)
- * @param bytes_to_copy Number of bytes to be copied
- * @param ptr           Pointer to the first byte to be written
- * @param async              Whether the operation should be asynchronous
- */
-inline void memory_write(handle_type & dst_buffer,
-                         vcl_size_t dst_offset,
-                         vcl_size_t bytes_to_copy,
-                         const void * ptr,
-                         bool async = false)
-{
-  assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
-
-  if (async)
-    cudaMemcpyAsync(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,
-                    reinterpret_cast<const char *>(ptr),
-                    bytes_to_copy,
-                    cudaMemcpyHostToDevice);
-  else
-    cudaMemcpy(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,
-               reinterpret_cast<const char *>(ptr),
-               bytes_to_copy,
-               cudaMemcpyHostToDevice);
-}
-
-
-/** @brief Reads data from a CUDA buffer back to main RAM.
- *
- * @param src_buffer         A smart pointer to the beginning of an allocated CUDA source buffer
- * @param src_offset         Offset of the first byte to be read from the beginning of src_buffer (in bytes_
- * @param bytes_to_copy      Number of bytes to be read
- * @param ptr                Location in main RAM where to read data should be written to
- * @param async              Whether the operation should be asynchronous
- */
-inline void memory_read(handle_type const & src_buffer,
-                        vcl_size_t src_offset,
-                        vcl_size_t bytes_to_copy,
-                        void * ptr,
-                        bool async = false)
-{
-  assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
-
-  if (async)
-    cudaMemcpyAsync(reinterpret_cast<char *>(ptr),
-                    reinterpret_cast<char *>(src_buffer.get()) + src_offset,
-                    bytes_to_copy,
-                    cudaMemcpyDeviceToHost);
-  else
-    cudaMemcpy(reinterpret_cast<char *>(ptr),
-               reinterpret_cast<char *>(src_buffer.get()) + src_offset,
-               bytes_to_copy,
-               cudaMemcpyDeviceToHost);
-}
-
-} //cuda
-} //backend
-} //viennacl
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp
deleted file mode 100644
index 37c680b..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp
+++ /dev/null
@@ -1,250 +0,0 @@
-#ifndef VIENNACL_BACKEND_MEM_HANDLE_HPP
-#define VIENNACL_BACKEND_MEM_HANDLE_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/backend/mem_handle.hpp
-    @brief Implements the multi-memory-domain handle
-*/
-
-#include <vector>
-#include <cassert>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/shared_ptr.hpp"
-#include "viennacl/backend/cpu_ram.hpp"
-
-#ifdef VIENNACL_WITH_OPENCL
-#include "viennacl/backend/opencl.hpp"
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-#include "viennacl/backend/cuda.hpp"
-#endif
-
-
-namespace viennacl
-{
-namespace backend
-{
-
-namespace detail
-{
-  /** @brief Singleton for managing the default memory type.
-  *
-  * @param new_mem_type    If NULL, returns the current memory type. Otherwise, sets the memory type to the provided value.
-  */
-  inline memory_types get_set_default_memory_type(memory_types * new_mem_type)
-  {
-    // if a user compiles with CUDA, it is reasonable to expect that CUDA should be the default
-#ifdef VIENNACL_WITH_CUDA
-    static memory_types mem_type = CUDA_MEMORY;
-#elif defined(VIENNACL_WITH_OPENCL)
-    static memory_types mem_type = OPENCL_MEMORY;
-#else
-    static memory_types mem_type = MAIN_MEMORY;
-#endif
-
-    if (new_mem_type)
-      mem_type = *new_mem_type;
-
-    return mem_type;
-  }
-}
-
-/** @brief Returns the default memory type for the given configuration.
- *
- * CUDA has precedence over OpenCL, which has precedence over main memory. Depends on which VIENNACL_WITH_{CUDA/OPENCL/OPENMP} macros are defined.
- */
-inline memory_types default_memory_type() { return detail::get_set_default_memory_type(NULL); }
-
-/** @brief Sets the default memory type for the given configuration.
- *
- * Make sure the respective new memory type is enabled.
- * For example, passing CUDA_MEMORY if no CUDA backend is selected will result in exceptions being thrown as soon as you try to allocate buffers.
- */
-inline memory_types default_memory_type(memory_types new_memory_type) { return detail::get_set_default_memory_type(&new_memory_type); }
-
-
-/** @brief Main abstraction class for multiple memory domains. Represents a buffer in either main RAM, an OpenCL context, or a CUDA device.
- *
- * The idea is to wrap all possible handle types inside this class so that higher-level code does not need to be cluttered with preprocessor switches.
- * Instead, this class collects all the necessary conditional compilations.
- *
- */
-class mem_handle
-{
-public:
-  typedef viennacl::tools::shared_ptr<char>      ram_handle_type;
-  typedef viennacl::tools::shared_ptr<char>      cuda_handle_type;
-
-  /** @brief Default CTOR. No memory is allocated */
-  mem_handle() : active_handle_(MEMORY_NOT_INITIALIZED), size_in_bytes_(0) {}
-
-  /** @brief Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated. */
-  ram_handle_type       & ram_handle()       { return ram_handle_; }
-  /** @brief Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated. */
-  ram_handle_type const & ram_handle() const { return ram_handle_; }
-
-#ifdef VIENNACL_WITH_OPENCL
-  /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */
-  viennacl::ocl::handle<cl_mem>       & opencl_handle()       { return opencl_handle_; }
-  /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */
-  viennacl::ocl::handle<cl_mem> const & opencl_handle() const { return opencl_handle_; }
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-  /** @brief Returns the handle to a CUDA buffer. The handle contains NULL if no such buffer has been allocated. */
-  cuda_handle_type       & cuda_handle()       { return cuda_handle_; }
-  /** @brief Returns the handle to a CUDA buffer. The handle contains NULL if no such buffer has been allocated. */
-  cuda_handle_type const & cuda_handle() const { return cuda_handle_; }
-#endif
-
-  /** @brief Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no data. */
-  memory_types get_active_handle_id() const { return active_handle_; }
-
-  /** @brief Switches the currently active handle. If no support for that backend is provided, an exception is thrown. */
-  void switch_active_handle_id(memory_types new_id)
-  {
-    if (new_id != active_handle_)
-    {
-      if (active_handle_ == MEMORY_NOT_INITIALIZED)
-        active_handle_ = new_id;
-      else if (active_handle_ == MAIN_MEMORY)
-      {
-        active_handle_ = new_id;
-      }
-      else if (active_handle_ == OPENCL_MEMORY)
-      {
-#ifdef VIENNACL_WITH_OPENCL
-        active_handle_ = new_id;
-#else
-        throw memory_exception("compiled without OpenCL suppport!");
-#endif
-      }
-      else if (active_handle_ == CUDA_MEMORY)
-      {
-#ifdef VIENNACL_WITH_CUDA
-        active_handle_ = new_id;
-#else
-        throw memory_exception("compiled without CUDA suppport!");
-#endif
-      }
-      else
-        throw memory_exception("invalid new memory region!");
-    }
-  }
-
-  /** @brief Compares the two handles and returns true if the active memory handles in the two mem_handles point to the same buffer. */
-  bool operator==(mem_handle const & other) const
-  {
-    if (active_handle_ != other.active_handle_)
-      return false;
-
-    switch (active_handle_)
-    {
-    case MAIN_MEMORY:
-      return ram_handle_.get() == other.ram_handle_.get();
-#ifdef VIENNACL_WITH_OPENCL
-    case OPENCL_MEMORY:
-      return opencl_handle_.get() == other.opencl_handle_.get();
-#endif
-#ifdef VIENNACL_WITH_CUDA
-    case CUDA_MEMORY:
-      return cuda_handle_.get() == other.cuda_handle_.get();
-#endif
-    default: break;
-    }
-
-    return false;
-  }
-
-  /** @brief Compares the two handles and returns true if the active memory handles in the two mem_handles point a buffer with inferior address
-     * useful to store handles into a map, since they naturally have strong ordering
-     */
-  bool operator<(mem_handle const & other) const
-  {
-    if (active_handle_ != other.active_handle_)
-      return false;
-
-    switch (active_handle_)
-    {
-    case MAIN_MEMORY:
-      return ram_handle_.get() < other.ram_handle_.get();
-#ifdef VIENNACL_WITH_OPENCL
-    case OPENCL_MEMORY:
-      return opencl_handle_.get() < other.opencl_handle_.get();
-#endif
-#ifdef VIENNACL_WITH_CUDA
-    case CUDA_MEMORY:
-      return cuda_handle_.get() < other.cuda_handle_.get();
-#endif
-    default: break;
-    }
-
-    return false;
-  }
-
-
-  bool operator!=(mem_handle const & other) const { return !(*this == other); }
-
-  /** @brief Implements a fast swapping method. No data is copied, only the handles are exchanged. */
-  void swap(mem_handle & other)
-  {
-    // swap handle type:
-    memory_types active_handle_tmp = other.active_handle_;
-    other.active_handle_ = active_handle_;
-    active_handle_ = active_handle_tmp;
-
-    // swap ram handle:
-    ram_handle_type ram_handle_tmp = other.ram_handle_;
-    other.ram_handle_ = ram_handle_;
-    ram_handle_ = ram_handle_tmp;
-
-    // swap OpenCL handle:
-#ifdef VIENNACL_WITH_OPENCL
-    opencl_handle_.swap(other.opencl_handle_);
-#endif
-#ifdef VIENNACL_WITH_CUDA
-    cuda_handle_type cuda_handle_tmp = other.cuda_handle_;
-    other.cuda_handle_ = cuda_handle_;
-    cuda_handle_ = cuda_handle_tmp;
-#endif
-  }
-
-  /** @brief Returns the number of bytes of the currently active buffer */
-  vcl_size_t raw_size() const               { return size_in_bytes_; }
-
-  /** @brief Sets the size of the currently active buffer. Use with care! */
-  void        raw_size(vcl_size_t new_size) { size_in_bytes_ = new_size; }
-
-private:
-  memory_types active_handle_;
-  ram_handle_type ram_handle_;
-#ifdef VIENNACL_WITH_OPENCL
-  viennacl::ocl::handle<cl_mem> opencl_handle_;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-  cuda_handle_type        cuda_handle_;
-#endif
-  vcl_size_t size_in_bytes_;
-};
-
-
-} //backend
-} //viennacl
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp
deleted file mode 100644
index d6f29a5..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp
+++ /dev/null
@@ -1,628 +0,0 @@
-#ifndef VIENNACL_BACKEND_MEMORY_HPP
-#define VIENNACL_BACKEND_MEMORY_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/backend/memory.hpp
-    @brief Main interface routines for memory management
-*/
-
-#include <vector>
-#include <cassert>
-#include "viennacl/forwards.h"
-#include "viennacl/backend/mem_handle.hpp"
-#include "viennacl/context.hpp"
-#include "viennacl/traits/handle.hpp"
-#include "viennacl/traits/context.hpp"
-#include "viennacl/backend/util.hpp"
-
-#include "viennacl/backend/cpu_ram.hpp"
-
-#ifdef VIENNACL_WITH_OPENCL
-#include "viennacl/backend/opencl.hpp"
-#include "viennacl/ocl/backend.hpp"
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-#include "viennacl/backend/cuda.hpp"
-#endif
-
-
-namespace viennacl
-{
-namespace backend
-{
-
-
-  // if a user compiles with CUDA, it is reasonable to expect that CUDA should be the default
-  /** @brief Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed. */
-  inline void finish()
-  {
-#ifdef VIENNACL_WITH_CUDA
-    cudaDeviceSynchronize();
-#endif
-#ifdef VIENNACL_WITH_OPENCL
-    viennacl::ocl::get_queue().finish();
-#endif
-  }
-
-
-
-
-  // Requirements for backend:
-
-  // ---- Memory ----
-  //
-  // * memory_create(size, host_ptr)
-  // * memory_copy(src, dest, offset_src, offset_dest, size)
-  // * memory_write(src, offset, size, ptr)
-  // * memory_read(src, offset, size, ptr)
-  //
-
-  /** @brief Creates an array of the specified size. If the second argument is provided, the buffer is initialized with data from that pointer.
-  *
-  * This is the generic version for CPU RAM, CUDA, and OpenCL. Creates the memory in the currently active memory domain.
-  *
-  * @param handle          The generic wrapper handle for multiple memory domains which will hold the new buffer.
-  * @param size_in_bytes   Number of bytes to allocate
-  * @param ctx             Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host)
-  * @param host_ptr        Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data.
-  *
-  */
-  inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL)
-  {
-    if (size_in_bytes > 0)
-    {
-      if (handle.get_active_handle_id() == MEMORY_NOT_INITIALIZED)
-        handle.switch_active_handle_id(ctx.memory_type());
-
-      switch (handle.get_active_handle_id())
-      {
-      case MAIN_MEMORY:
-        handle.ram_handle() = cpu_ram::memory_create(size_in_bytes, host_ptr);
-        handle.raw_size(size_in_bytes);
-        break;
-#ifdef VIENNACL_WITH_OPENCL
-      case OPENCL_MEMORY:
-        handle.opencl_handle().context(ctx.opencl_context());
-        handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr);
-        handle.raw_size(size_in_bytes);
-        break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-      case CUDA_MEMORY:
-        handle.cuda_handle() = cuda::memory_create(size_in_bytes, host_ptr);
-        handle.raw_size(size_in_bytes);
-        break;
-#endif
-      case MEMORY_NOT_INITIALIZED:
-        throw memory_exception("not initialised!");
-      default:
-        throw memory_exception("unknown memory handle!");
-      }
-    }
-  }
-
-  /*
-  inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, const void * host_ptr = NULL)
-  {
-    viennacl::context  ctx(default_memory_type());
-    memory_create(handle, size_in_bytes, ctx, host_ptr);
-  }*/
-
-
-  /** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'dst_buffer + dst_offset'.
-  *
-  * This is the generic version for CPU RAM, CUDA, and OpenCL. Copies the memory in the currently active memory domain.
-  *
-  *
-  *  @param src_buffer     A smart pointer to the begin of an allocated buffer
-  *  @param dst_buffer     A smart pointer to the end of an allocated buffer
-  *  @param src_offset     Offset of the first byte to be written from the address given by 'src_buffer' (in bytes)
-  *  @param dst_offset     Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes)
-  *  @param bytes_to_copy  Number of bytes to be copied
-  */
-  inline void memory_copy(mem_handle const & src_buffer,
-                          mem_handle & dst_buffer,
-                          vcl_size_t src_offset,
-                          vcl_size_t dst_offset,
-                          vcl_size_t bytes_to_copy)
-  {
-    assert( src_buffer.get_active_handle_id() == dst_buffer.get_active_handle_id() && bool("memory_copy() must be called on buffers from the same domain") );
-
-    if (bytes_to_copy > 0)
-    {
-      switch (src_buffer.get_active_handle_id())
-      {
-      case MAIN_MEMORY:
-        cpu_ram::memory_copy(src_buffer.ram_handle(), dst_buffer.ram_handle(), src_offset, dst_offset, bytes_to_copy);
-        break;
-#ifdef VIENNACL_WITH_OPENCL
-      case OPENCL_MEMORY:
-        opencl::memory_copy(src_buffer.opencl_handle(), dst_buffer.opencl_handle(), src_offset, dst_offset, bytes_to_copy);
-        break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-      case CUDA_MEMORY:
-        cuda::memory_copy(src_buffer.cuda_handle(), dst_buffer.cuda_handle(), src_offset, dst_offset, bytes_to_copy);
-        break;
-#endif
-      case MEMORY_NOT_INITIALIZED:
-        throw memory_exception("not initialised!");
-      default:
-        throw memory_exception("unknown memory handle!");
-      }
-    }
-  }
-
-  // TODO: Refine this concept. Maybe move to constructor?
-  /** @brief A 'shallow' copy operation from an initialized buffer to an uninitialized buffer.
-   * The uninitialized buffer just copies the raw handle.
-   */
-  inline void memory_shallow_copy(mem_handle const & src_buffer,
-                                  mem_handle & dst_buffer)
-  {
-    assert( (dst_buffer.get_active_handle_id() == MEMORY_NOT_INITIALIZED) && bool("Shallow copy on already initialized memory not supported!"));
-
-    switch (src_buffer.get_active_handle_id())
-    {
-    case MAIN_MEMORY:
-      dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id());
-      dst_buffer.ram_handle() = src_buffer.ram_handle();
-      dst_buffer.raw_size(src_buffer.raw_size());
-      break;
-#ifdef VIENNACL_WITH_OPENCL
-    case OPENCL_MEMORY:
-      dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id());
-      dst_buffer.opencl_handle() = src_buffer.opencl_handle();
-      dst_buffer.raw_size(src_buffer.raw_size());
-      break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-    case CUDA_MEMORY:
-      dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id());
-      dst_buffer.cuda_handle() = src_buffer.cuda_handle();
-      dst_buffer.raw_size(src_buffer.raw_size());
-      break;
-#endif
-    case MEMORY_NOT_INITIALIZED:
-      throw memory_exception("not initialised!");
-    default:
-      throw memory_exception("unknown memory handle!");
-    }
-  }
-
-  /** @brief Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'
-  *
-  * This is the generic version for CPU RAM, CUDA, and OpenCL. Writes the memory in the currently active memory domain.
-  *
-  * @param dst_buffer     A smart pointer to the beginning of an allocated buffer
-  * @param dst_offset     Offset of the first written byte from the beginning of 'dst_buffer' (in bytes)
-  * @param bytes_to_write Number of bytes to be written
-  * @param ptr            Pointer to the first byte to be written
-  * @param async              Whether the operation should be asynchronous
-  */
-  inline void memory_write(mem_handle & dst_buffer,
-                           vcl_size_t dst_offset,
-                           vcl_size_t bytes_to_write,
-                           const void * ptr,
-                           bool async = false)
-  {
-    if (bytes_to_write > 0)
-    {
-      switch (dst_buffer.get_active_handle_id())
-      {
-      case MAIN_MEMORY:
-        cpu_ram::memory_write(dst_buffer.ram_handle(), dst_offset, bytes_to_write, ptr, async);
-        break;
-#ifdef VIENNACL_WITH_OPENCL
-      case OPENCL_MEMORY:
-        opencl::memory_write(dst_buffer.opencl_handle(), dst_offset, bytes_to_write, ptr, async);
-        break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-      case CUDA_MEMORY:
-        cuda::memory_write(dst_buffer.cuda_handle(), dst_offset, bytes_to_write, ptr, async);
-        break;
-#endif
-      case MEMORY_NOT_INITIALIZED:
-        throw memory_exception("not initialised!");
-      default:
-        throw memory_exception("unknown memory handle!");
-      }
-    }
-  }
-
-  /** @brief Reads data from a buffer back to main RAM.
-  *
-  * This is the generic version for CPU RAM, CUDA, and OpenCL. Reads the memory from the currently active memory domain.
-  *
-  * @param src_buffer         A smart pointer to the beginning of an allocated source buffer
-  * @param src_offset         Offset of the first byte to be read from the beginning of src_buffer (in bytes_
-  * @param bytes_to_read      Number of bytes to be read
-  * @param ptr                Location in main RAM where to read data should be written to
-  * @param async              Whether the operation should be asynchronous
-  */
-  inline void memory_read(mem_handle const & src_buffer,
-                          vcl_size_t src_offset,
-                          vcl_size_t bytes_to_read,
-                          void * ptr,
-                          bool async = false)
-  {
-    //finish(); //Fixes some issues with AMD APP SDK. However, might sacrifice a few percents of performance in some cases.
-
-    if (bytes_to_read > 0)
-    {
-      switch (src_buffer.get_active_handle_id())
-      {
-      case MAIN_MEMORY:
-        cpu_ram::memory_read(src_buffer.ram_handle(), src_offset, bytes_to_read, ptr, async);
-        break;
-#ifdef VIENNACL_WITH_OPENCL
-      case OPENCL_MEMORY:
-        opencl::memory_read(src_buffer.opencl_handle(), src_offset, bytes_to_read, ptr, async);
-        break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-      case CUDA_MEMORY:
-        cuda::memory_read(src_buffer.cuda_handle(), src_offset, bytes_to_read, ptr, async);
-        break;
-#endif
-      case MEMORY_NOT_INITIALIZED:
-        throw memory_exception("not initialised!");
-      default:
-        throw memory_exception("unknown memory handle!");
-      }
-    }
-  }
-
-
-
-  namespace detail
-  {
-    template<typename T>
-    vcl_size_t element_size(memory_types /* mem_type */)
-    {
-      return sizeof(T);
-    }
-
-
-    template<>
-    inline vcl_size_t element_size<unsigned long>(memory_types
-                                            #ifdef VIENNACL_WITH_OPENCL
-                                                  mem_type  //in order to compile cleanly at -Wextra in GCC
-                                            #endif
-                                                  )
-    {
-#ifdef VIENNACL_WITH_OPENCL
-      if (mem_type == OPENCL_MEMORY)
-        return sizeof(cl_ulong);
-#endif
-      return sizeof(unsigned long);
-    }
-
-    template<>
-    inline vcl_size_t element_size<long>(memory_types
-                                   #ifdef VIENNACL_WITH_OPENCL
-                                         mem_type  //in order to compile cleanly at -Wextra in GCC
-                                   #endif
-                                         )
-    {
-#ifdef VIENNACL_WITH_OPENCL
-      if (mem_type == OPENCL_MEMORY)
-        return sizeof(cl_long);
-#endif
-      return sizeof(long);
-    }
-
-
-    template<>
-    inline vcl_size_t element_size<unsigned int>(memory_types
-                                           #ifdef VIENNACL_WITH_OPENCL
-                                                 mem_type  //in order to compile cleanly at -Wextra in GCC
-                                           #endif
-                                                 )
-    {
-#ifdef VIENNACL_WITH_OPENCL
-      if (mem_type == OPENCL_MEMORY)
-        return sizeof(cl_uint);
-#endif
-      return sizeof(unsigned int);
-    }
-
-    template<>
-    inline vcl_size_t element_size<int>(memory_types
-                                  #ifdef VIENNACL_WITH_OPENCL
-                                        mem_type  //in order to compile cleanly at -Wextra in GCC
-                                  #endif
-                                        )
-    {
-#ifdef VIENNACL_WITH_OPENCL
-      if (mem_type == OPENCL_MEMORY)
-        return sizeof(cl_int);
-#endif
-      return sizeof(int);
-    }
-
-
-  }
-
-
-  /** @brief Switches the active memory domain within a memory handle. Data is copied if the new active domain differs from the old one. Memory in the source handle is not free'd. */
-  template<typename DataType>
-  void switch_memory_context(mem_handle & handle, viennacl::context new_ctx)
-  {
-    if (handle.get_active_handle_id() == new_ctx.memory_type())
-      return;
-
-    if (handle.get_active_handle_id() == viennacl::MEMORY_NOT_INITIALIZED || handle.raw_size() == 0)
-    {
-      handle.switch_active_handle_id(new_ctx.memory_type());
-#ifdef VIENNACL_WITH_OPENCL
-      if (new_ctx.memory_type() == OPENCL_MEMORY)
-        handle.opencl_handle().context(new_ctx.opencl_context());
-#endif
-      return;
-    }
-
-    vcl_size_t size_dst = detail::element_size<DataType>(handle.get_active_handle_id());
-    vcl_size_t size_src = detail::element_size<DataType>(new_ctx.memory_type());
-
-    if (size_dst != size_src)  // OpenCL data element size not the same as host data element size
-    {
-      throw memory_exception("Heterogeneous data element sizes not yet supported!");
-    }
-    else //no data conversion required
-    {
-      if (handle.get_active_handle_id() == MAIN_MEMORY) //we can access the existing data directly
-      {
-        switch (new_ctx.memory_type())
-        {
-#ifdef VIENNACL_WITH_OPENCL
-        case OPENCL_MEMORY:
-          handle.opencl_handle().context(new_ctx.opencl_context());
-          handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), handle.raw_size(), handle.ram_handle().get());
-          break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-        case CUDA_MEMORY:
-          handle.cuda_handle() = cuda::memory_create(handle.raw_size(), handle.ram_handle().get());
-          break;
-#endif
-        case MAIN_MEMORY:
-        default:
-          throw memory_exception("Invalid destination domain");
-        }
-      }
-#ifdef VIENNACL_WITH_OPENCL
-      else if (handle.get_active_handle_id() == OPENCL_MEMORY) // data can be dumped into destination directly
-      {
-        std::vector<DataType> buffer;
-
-        switch (new_ctx.memory_type())
-        {
-        case MAIN_MEMORY:
-          handle.ram_handle() = cpu_ram::memory_create(handle.raw_size());
-          opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), handle.ram_handle().get());
-          break;
-#ifdef VIENNACL_WITH_CUDA
-        case CUDA_MEMORY:
-          buffer.resize(handle.raw_size() / sizeof(DataType));
-          opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), &(buffer[0]));
-          cuda::memory_create(handle.cuda_handle(), handle.raw_size(), &(buffer[0]));
-          break;
-#endif
-        default:
-          throw memory_exception("Invalid destination domain");
-        }
-      }
-#endif
-#ifdef VIENNACL_WITH_CUDA
-      else //CUDA_MEMORY
-      {
-        std::vector<DataType> buffer;
-
-        // write
-        switch (new_ctx.memory_type())
-        {
-        case MAIN_MEMORY:
-          handle.ram_handle() = cpu_ram::memory_create(handle.raw_size());
-          cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), handle.ram_handle().get());
-          break;
-#ifdef VIENNACL_WITH_OPENCL
-        case OPENCL_MEMORY:
-          buffer.resize(handle.raw_size() / sizeof(DataType));
-          cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), &(buffer[0]));
-          handle.opencl_handle() = opencl::memory_create(handle.raw_size(), &(buffer[0]));
-          break;
-#endif
-        default:
-          throw memory_exception("Unsupported source memory domain");
-        }
-      }
-#endif
-
-      // everything succeeded so far, now switch to new domain:
-      handle.switch_active_handle_id(new_ctx.memory_type());
-
-    } // no data conversion
-  }
-
-
-
-  /** @brief Copies data of the provided 'DataType' from 'handle_src' to 'handle_dst' and converts the data if the binary representation of 'DataType' among the memory domains differs. */
-  template<typename DataType>
-  void typesafe_memory_copy(mem_handle const & handle_src, mem_handle & handle_dst)
-  {
-    if (handle_dst.get_active_handle_id() == MEMORY_NOT_INITIALIZED)
-      handle_dst.switch_active_handle_id(default_memory_type());
-
-    vcl_size_t element_size_src = detail::element_size<DataType>(handle_src.get_active_handle_id());
-    vcl_size_t element_size_dst = detail::element_size<DataType>(handle_dst.get_active_handle_id());
-
-    if (element_size_src != element_size_dst)
-    {
-      // Data needs to be converted.
-
-      typesafe_host_array<DataType> buffer_src(handle_src);
-      typesafe_host_array<DataType> buffer_dst(handle_dst, handle_src.raw_size() / element_size_src);
-
-      //
-      // Step 1: Fill buffer_dst depending on where the data resides:
-      //
-      DataType const * src_data;
-      switch (handle_src.get_active_handle_id())
-      {
-      case MAIN_MEMORY:
-        src_data = reinterpret_cast<DataType const *>(handle_src.ram_handle().get());
-        for (vcl_size_t i=0; i<buffer_dst.size(); ++i)
-          buffer_dst.set(i, src_data[i]);
-        break;
-
-#ifdef VIENNACL_WITH_OPENCL
-      case OPENCL_MEMORY:
-        buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src);
-        opencl::memory_read(handle_src.opencl_handle(), 0, buffer_src.raw_size(), buffer_src.get());
-        for (vcl_size_t i=0; i<buffer_dst.size(); ++i)
-          buffer_dst.set(i, buffer_src[i]);
-        break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-      case CUDA_MEMORY:
-        buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src);
-        cuda::memory_read(handle_src.cuda_handle(), 0, buffer_src.raw_size(), buffer_src.get());
-        for (vcl_size_t i=0; i<buffer_dst.size(); ++i)
-          buffer_dst.set(i, buffer_src[i]);
-        break;
-#endif
-
-      default:
-        throw memory_exception("unsupported memory domain");
-      }
-
-      //
-      // Step 2: Write to destination
-      //
-      if (handle_dst.raw_size() == buffer_dst.raw_size())
-        viennacl::backend::memory_write(handle_dst, 0, buffer_dst.raw_size(), buffer_dst.get());
-      else
-        viennacl::backend::memory_create(handle_dst, buffer_dst.raw_size(), viennacl::traits::context(handle_dst), buffer_dst.get());
-
-    }
-    else
-    {
-      // No data conversion required.
-      typesafe_host_array<DataType> buffer(handle_src);
-
-      switch (handle_src.get_active_handle_id())
-      {
-      case MAIN_MEMORY:
-        switch (handle_dst.get_active_handle_id())
-        {
-        case MAIN_MEMORY:
-        case OPENCL_MEMORY:
-        case CUDA_MEMORY:
-          if (handle_dst.raw_size() == handle_src.raw_size())
-            viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), handle_src.ram_handle().get());
-          else
-            viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst), handle_src.ram_handle().get());
-          break;
-
-        default:
-          throw memory_exception("unsupported destination memory domain");
-        }
-        break;
-
-      case OPENCL_MEMORY:
-        switch (handle_dst.get_active_handle_id())
-        {
-        case MAIN_MEMORY:
-          if (handle_dst.raw_size() != handle_src.raw_size())
-            viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
-          viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get());
-          break;
-
-        case OPENCL_MEMORY:
-          if (handle_dst.raw_size() != handle_src.raw_size())
-            viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
-          viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size());
-          break;
-
-        case CUDA_MEMORY:
-          if (handle_dst.raw_size() != handle_src.raw_size())
-            viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
-          buffer.resize(handle_src, handle_src.raw_size() / element_size_src);
-          viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get());
-          viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get());
-          break;
-
-        default:
-          throw memory_exception("unsupported destination memory domain");
-        }
-        break;
-
-      case CUDA_MEMORY:
-        switch (handle_dst.get_active_handle_id())
-        {
-        case MAIN_MEMORY:
-          if (handle_dst.raw_size() != handle_src.raw_size())
-            viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
-          viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get());
-          break;
-
-        case OPENCL_MEMORY:
-          if (handle_dst.raw_size() != handle_src.raw_size())
-            viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
-          buffer.resize(handle_src, handle_src.raw_size() / element_size_src);
-          viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get());
-          viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get());
-          break;
-
-        case CUDA_MEMORY:
-          if (handle_dst.raw_size() != handle_src.raw_size())
-            viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
-          viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size());
-          break;
-
-        default:
-          throw memory_exception("unsupported destination memory domain");
-        }
-        break;
-
-      default:
-        throw memory_exception("unsupported source memory domain");
-      }
-
-    }
-  }
-
-
-} //backend
-
-//
-// Convenience layer:
-//
-/** @brief Generic convenience routine for migrating data of an object to a new memory domain */
-template<typename T>
-void switch_memory_context(T & obj, viennacl::context new_ctx)
-{
-  obj.switch_memory_context(new_ctx);
-}
-
-} //viennacl
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp
deleted file mode 100644
index a8be55a..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp
+++ /dev/null
@@ -1,151 +0,0 @@
-#ifndef VIENNACL_BACKEND_OPENCL_HPP_
-#define VIENNACL_BACKEND_OPENCL_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/backend/opencl.hpp
-    @brief Implementations for the OpenCL backend functionality
-*/
-
-
-#include <vector>
-#include "viennacl/ocl/handle.hpp"
-#include "viennacl/ocl/backend.hpp"
-
-namespace viennacl
-{
-namespace backend
-{
-namespace opencl
-{
-
-// Requirements for backend:
-
-// * memory_create(size, host_ptr)
-// * memory_copy(src, dest, offset_src, offset_dest, size)
-// * memory_write_from_main_memory(src, offset, size,
-//                                 dest, offset, size)
-// * memory_read_to_main_memory(src, offset, size
-//                              dest, offset, size)
-// *
-//
-
-/** @brief Creates an array of the specified size in the current OpenCL context. If the second argument is provided, the buffer is initialized with data from that pointer.
- *
- * @param size_in_bytes   Number of bytes to allocate
- * @param host_ptr        Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data.
- * @param ctx             Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host)
- *
- */
-inline cl_mem memory_create(viennacl::ocl::context const & ctx, vcl_size_t size_in_bytes, const void * host_ptr = NULL)
-{
-  //std::cout << "Creating buffer (" << size_in_bytes << " bytes) host buffer " << host_ptr << " in context " << &ctx << std::endl;
-  return ctx.create_memory_without_smart_handle(CL_MEM_READ_WRITE, static_cast<unsigned int>(size_in_bytes), const_cast<void *>(host_ptr));
-}
-
-/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' in the OpenCL context to memory starting at address 'dst_buffer + dst_offset' in the same OpenCL context.
- *
- *  @param src_buffer     A smart pointer to the begin of an allocated OpenCL buffer
- *  @param dst_buffer     A smart pointer to the end of an allocated OpenCL buffer
- *  @param src_offset     Offset of the first byte to be written from the address given by 'src_buffer' (in bytes)
- *  @param dst_offset     Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes)
- *  @param bytes_to_copy  Number of bytes to be copied
- */
-inline void memory_copy(viennacl::ocl::handle<cl_mem> const & src_buffer,
-                        viennacl::ocl::handle<cl_mem> & dst_buffer,
-                        vcl_size_t src_offset,
-                        vcl_size_t dst_offset,
-                        vcl_size_t bytes_to_copy)
-{
-  assert( &src_buffer.context() == &dst_buffer.context() && bool("Transfer between memory buffers in different contexts not supported yet!"));
-
-  viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(src_buffer.context());
-  cl_int err = clEnqueueCopyBuffer(memory_context.get_queue().handle().get(),
-                                   src_buffer.get(),
-                                   dst_buffer.get(),
-                                   src_offset,
-                                   dst_offset,
-                                   bytes_to_copy,
-                                   0, NULL, NULL);  //events
-  VIENNACL_ERR_CHECK(err);
-}
-
-
-/** @brief Writes data from main RAM identified by 'ptr' to the OpenCL buffer identified by 'dst_buffer'
- *
- * @param dst_buffer    A smart pointer to the beginning of an allocated OpenCL buffer
- * @param dst_offset    Offset of the first written byte from the beginning of 'dst_buffer' (in bytes)
- * @param bytes_to_copy Number of bytes to be copied
- * @param ptr           Pointer to the first byte to be written
- * @param async         Whether the operation should be asynchronous
- */
-inline void memory_write(viennacl::ocl::handle<cl_mem> & dst_buffer,
-                         vcl_size_t dst_offset,
-                         vcl_size_t bytes_to_copy,
-                         const void * ptr,
-                         bool async = false)
-{
-
-  viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(dst_buffer.context());
-
-#if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE)
-  std::cout << "Writing data (" << bytes_to_copy << " bytes, offset " << dst_offset << ") to OpenCL buffer " << dst_buffer.get() << " with queue " << memory_context.get_queue().handle().get() << " from " << ptr << std::endl;
-#endif
-
-  cl_int err = clEnqueueWriteBuffer(memory_context.get_queue().handle().get(),
-                                    dst_buffer.get(),
-                                    async ? CL_FALSE : CL_TRUE,             //blocking
-                                    dst_offset,
-                                    bytes_to_copy,
-                                    ptr,
-                                    0, NULL, NULL);      //events
-  VIENNACL_ERR_CHECK(err);
-}
-
-
-/** @brief Reads data from an OpenCL buffer back to main RAM.
- *
- * @param src_buffer         A smart pointer to the beginning of an allocated OpenCL source buffer
- * @param src_offset         Offset of the first byte to be read from the beginning of src_buffer (in bytes_
- * @param bytes_to_copy      Number of bytes to be read
- * @param ptr                Location in main RAM where to read data should be written to
- * @param async         Whether the operation should be asynchronous
- */
-inline void memory_read(viennacl::ocl::handle<cl_mem> const & src_buffer,
-                        vcl_size_t src_offset,
-                        vcl_size_t bytes_to_copy,
-                        void * ptr,
-                        bool async = false)
-{
-  //std::cout << "Reading data (" << bytes_to_copy << " bytes, offset " << src_offset << ") from OpenCL buffer " << src_buffer.get() << " to " << ptr << std::endl;
-  viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(src_buffer.context());
-  cl_int err =  clEnqueueReadBuffer(memory_context.get_queue().handle().get(),
-                                    src_buffer.get(),
-                                    async ? CL_FALSE : CL_TRUE,             //blocking
-                                    src_offset,
-                                    bytes_to_copy,
-                                    ptr,
-                                    0, NULL, NULL);      //events
-  VIENNACL_ERR_CHECK(err);
-}
-
-
-}
-} //backend
-} //viennacl
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp
deleted file mode 100644
index 9aaeb2e..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp
+++ /dev/null
@@ -1,268 +0,0 @@
-#ifndef VIENNACL_BACKEND_UTIL_HPP
-#define VIENNACL_BACKEND_UTIL_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/backend/util.hpp
-    @brief Helper functionality for working with different memory domains
-*/
-
-#include <vector>
-#include <cassert>
-
-#include "viennacl/forwards.h"
-#include "viennacl/backend/mem_handle.hpp"
-
-#ifdef VIENNACL_WITH_OPENCL
-#include "viennacl/backend/opencl.hpp"
-#endif
-
-
-namespace viennacl
-{
-namespace backend
-{
-namespace detail
-{
-
-  /** @brief Helper struct for converting a type to its OpenCL pendant. */
-  template<typename T>
-  struct convert_to_opencl
-  {
-    typedef T    type;
-    enum { special = 0 };
-  };
-
-#ifdef VIENNACL_WITH_OPENCL
-  template<>
-  struct convert_to_opencl<unsigned int>
-  {
-    typedef cl_uint    type;
-    //enum { special = (sizeof(unsigned int) != sizeof(cl_uint)) };
-    enum { special = 1 };
-  };
-
-  template<>
-  struct convert_to_opencl<int>
-  {
-    typedef cl_int    type;
-    //enum { special = (sizeof(int) != sizeof(cl_int)) };
-    enum { special = 1 };
-  };
-
-
-  template<>
-  struct convert_to_opencl<unsigned long>
-  {
-    typedef cl_ulong    type;
-    //enum { special = (sizeof(unsigned long) != sizeof(cl_ulong)) };
-    enum { special = 1 };
-  };
-
-  template<>
-  struct convert_to_opencl<long>
-  {
-    typedef cl_long    type;
-    //enum { special = (sizeof(long) != sizeof(cl_long)) };
-    enum { special = 1 };
-  };
-#endif
-
-
-} //namespace detail
-
-
-/** @brief Helper class implementing an array on the host. Default case: No conversion necessary */
-template<typename T, bool special = detail::convert_to_opencl<T>::special>
-class typesafe_host_array
-{
-  typedef T                                              cpu_type;
-  typedef typename detail::convert_to_opencl<T>::type    target_type;
-
-public:
-  explicit typesafe_host_array() : bytes_buffer_(NULL), buffer_size_(0) {}
-
-  explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num)
-  {
-    resize(handle, num);
-  }
-
-  ~typesafe_host_array() { delete[] bytes_buffer_; }
-
-  //
-  // Setter and Getter
-  //
-  void * get() { return reinterpret_cast<void *>(bytes_buffer_); }
-  vcl_size_t raw_size() const { return buffer_size_; }
-  vcl_size_t element_size() const  {  return sizeof(cpu_type); }
-  vcl_size_t size() const { return buffer_size_ / element_size(); }
-  template<typename U>
-  void set(vcl_size_t index, U value)
-  {
-    reinterpret_cast<cpu_type *>(bytes_buffer_)[index] = static_cast<cpu_type>(value);
-  }
-
-  //
-  // Resize functionality
-  //
-
-  /** @brief Resize without initializing the new memory */
-  void raw_resize(mem_handle const & /*handle*/, vcl_size_t num)
-  {
-    buffer_size_ = sizeof(cpu_type) * num;
-
-    if (num > 0)
-    {
-      delete[] bytes_buffer_;
-
-      bytes_buffer_ = new char[buffer_size_];
-    }
-  }
-
-  /** @brief Resize including initialization of new memory (cf. std::vector<>) */
-  void resize(mem_handle const & handle, vcl_size_t num)
-  {
-    raw_resize(handle, num);
-
-    if (num > 0)
-    {
-      for (vcl_size_t i=0; i<buffer_size_; ++i)
-        bytes_buffer_[i] = 0;
-    }
-  }
-
-  cpu_type operator[](vcl_size_t index) const
-  {
-    assert(index < size() && bool("index out of bounds"));
-
-    return reinterpret_cast<cpu_type *>(bytes_buffer_)[index];
-  }
-
-private:
-  char * bytes_buffer_;
-  vcl_size_t buffer_size_;
-};
-
-
-
-
-/** @brief Special host array type for conversion between OpenCL types and pure CPU types */
-template<typename T>
-class typesafe_host_array<T, true>
-{
-  typedef T                                              cpu_type;
-  typedef typename detail::convert_to_opencl<T>::type    target_type;
-
-public:
-  explicit typesafe_host_array() : convert_to_opencl_( (default_memory_type() == OPENCL_MEMORY) ? true : false), bytes_buffer_(NULL), buffer_size_(0) {}
-
-  explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : convert_to_opencl_(false), bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num)
-  {
-    resize(handle, num);
-  }
-
-  ~typesafe_host_array() { delete[] bytes_buffer_; }
-
-  //
-  // Setter and Getter
-  //
-
-  template<typename U>
-  void set(vcl_size_t index, U value)
-  {
-#ifdef VIENNACL_WITH_OPENCL
-    if (convert_to_opencl_)
-      reinterpret_cast<target_type *>(bytes_buffer_)[index] = static_cast<target_type>(value);
-    else
-#endif
-      reinterpret_cast<cpu_type *>(bytes_buffer_)[index] = static_cast<cpu_type>(value);
-  }
-
-  void * get() { return reinterpret_cast<void *>(bytes_buffer_); }
-  cpu_type operator[](vcl_size_t index) const
-  {
-    assert(index < size() && bool("index out of bounds"));
-#ifdef VIENNACL_WITH_OPENCL
-    if (convert_to_opencl_)
-      return static_cast<cpu_type>(reinterpret_cast<target_type *>(bytes_buffer_)[index]);
-#endif
-    return reinterpret_cast<cpu_type *>(bytes_buffer_)[index];
-  }
-
-  vcl_size_t raw_size() const { return buffer_size_; }
-  vcl_size_t element_size() const
-  {
-#ifdef VIENNACL_WITH_OPENCL
-    if (convert_to_opencl_)
-      return sizeof(target_type);
-#endif
-    return sizeof(cpu_type);
-  }
-  vcl_size_t size() const { return buffer_size_ / element_size(); }
-
-  //
-  // Resize functionality
-  //
-
-  /** @brief Resize without initializing the new memory */
-  void raw_resize(mem_handle const & handle, vcl_size_t num)
-  {
-    buffer_size_ = sizeof(cpu_type) * num;
-    (void)handle; //silence unused variable warning if compiled without OpenCL support
-
-#ifdef VIENNACL_WITH_OPENCL
-    memory_types mem_type = handle.get_active_handle_id();
-    if (mem_type == MEMORY_NOT_INITIALIZED)
-      mem_type = default_memory_type();
-
-    if (mem_type == OPENCL_MEMORY)
-    {
-      convert_to_opencl_ = true;
-      buffer_size_ = sizeof(target_type) * num;
-    }
-#endif
-
-    if (num > 0)
-    {
-      delete[] bytes_buffer_;
-
-      bytes_buffer_ = new char[buffer_size_];
-    }
-  }
-
-  /** @brief Resize including initialization of new memory (cf. std::vector<>) */
-  void resize(mem_handle const & handle, vcl_size_t num)
-  {
-    raw_resize(handle, num);
-
-    if (num > 0)
-    {
-      for (vcl_size_t i=0; i<buffer_size_; ++i)
-        bytes_buffer_[i] = 0;
-    }
-  }
-
-private:
-  bool convert_to_opencl_;
-  char * bytes_buffer_;
-  vcl_size_t buffer_size_;
-};
-
-} //backend
-} //viennacl
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp
deleted file mode 100644
index 1ee13d5..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp
+++ /dev/null
@@ -1,359 +0,0 @@
-#ifndef VIENNACL_CIRCULANT_MATRIX_HPP
-#define VIENNACL_CIRCULANT_MATRIX_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file circulant_matrix.hpp
-    @brief Implementation of the circulant_matrix class for efficient manipulation of circulant matrices.  Experimental.
-*/
-
-#include "viennacl/forwards.h"
-#include "viennacl/vector.hpp"
-#include "viennacl/ocl/backend.hpp"
-
-#include "viennacl/linalg/circulant_matrix_operations.hpp"
-
-#include "viennacl/fft.hpp"
-
-namespace viennacl
-{
-/** @brief A Circulant matrix class
-  *
-  * @tparam NumericT  The underlying scalar type (either float or double)
-  * @tparam AlignmentV   The internal memory size is given by (size()/AlignmentV + 1) * AlignmentV. AlignmentV must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory.
-  */
-template<class NumericT, unsigned int AlignmentV>
-class circulant_matrix
-{
-public:
-  typedef viennacl::backend::mem_handle                                                              handle_type;
-  typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType>   value_type;
-
-  /**
-    * @brief The default constructor. Does not allocate any memory.
-    *
-    */
-  explicit circulant_matrix() {}
-
-  /**
-    * @brief         Creates the matrix with the given size
-    *
-    * @param rows      Number of rows of the matrix
-    * @param cols      Number of columns of the matrix
-    */
-  explicit circulant_matrix(vcl_size_t rows, vcl_size_t cols) : elements_(rows)
-  {
-    assert(rows == cols && bool("Circulant matrix must be square!"));
-    (void)cols;  // avoid 'unused parameter' warning in optimized builds
-  }
-
-  /** @brief Resizes the matrix.
-   *   Existing entries can be preserved
-   *
-   * @param sz         New size of matrix
-   * @param preserve   If true, existing values are preserved.
-   */
-  void resize(vcl_size_t sz, bool preserve = true)
-  {
-    elements_.resize(sz, preserve);
-  }
-
-  /** @brief Returns the OpenCL handle
-   *
-   *   @return OpenCL handle
-   */
-  handle_type const & handle() const { return elements_.handle(); }
-
-  /**
-    * @brief Returns an internal viennacl::vector, which represents a circulant matrix elements
-    *
-    */
-  viennacl::vector<NumericT, AlignmentV> & elements() { return elements_; }
-  viennacl::vector<NumericT, AlignmentV> const & elements() const { return elements_; }
-
-  /**
-    * @brief Returns the number of rows of the matrix
-    */
-  vcl_size_t size1() const { return elements_.size(); }
-
-  /**
-    * @brief Returns the number of columns of the matrix
-    */
-  vcl_size_t size2() const { return elements_.size(); }
-
-  /** @brief Returns the internal size of matrix representtion.
-   *   Usually required for launching OpenCL kernels only
-   *
-   *   @return Internal size of matrix representation
-   */
-  vcl_size_t internal_size() const { return elements_.internal_size(); }
-
-  /**
-    * @brief Read-write access to a single element of the matrix
-    *
-    * @param row_index  Row index of accessed element
-    * @param col_index  Column index of accessed element
-    * @return Proxy for matrix entry
-    */
-  entry_proxy<NumericT> operator()(vcl_size_t row_index, vcl_size_t col_index)
-  {
-    long index = static_cast<long>(row_index) - static_cast<long>(col_index);
-
-    assert(row_index < size1() && col_index < size2() && bool("Invalid access"));
-
-    while (index < 0)
-      index += static_cast<long>(size1());
-    return elements_[static_cast<vcl_size_t>(index)];
-  }
-
-  /**
-    * @brief += operation for circulant matrices
-    *
-    * @param that Matrix which will be added
-    * @return Result of addition
-    */
-  circulant_matrix<NumericT, AlignmentV>& operator +=(circulant_matrix<NumericT, AlignmentV>& that)
-  {
-    elements_ += that.elements();
-    return *this;
-  }
-
-private:
-  circulant_matrix(circulant_matrix const &) {}
-  circulant_matrix & operator=(circulant_matrix const & t);
-
-  viennacl::vector<NumericT, AlignmentV> elements_;
-};
-
-/** @brief Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU)
-  *
-  *
-  * @param cpu_vec   A std::vector on the host.
-  * @param gpu_mat   A circulant_matrix from ViennaCL
-  */
-template<typename NumericT, unsigned int AlignmentV>
-void copy(std::vector<NumericT>& cpu_vec, circulant_matrix<NumericT, AlignmentV>& gpu_mat)
-{
-  assert( (gpu_mat.size1() == 0 || cpu_vec.size() == gpu_mat.size1()) && bool("Size mismatch"));
-  copy(cpu_vec, gpu_mat.elements());
-}
-
-/** @brief Copies a circulant matrix from the OpenCL device (either GPU or multi-core CPU) to the std::vector
-  *
-  *
-  * @param gpu_mat   A circulant_matrix from ViennaCL
-  * @param cpu_vec   A std::vector on the host.
-  */
-template<typename NumericT, unsigned int AlignmentV>
-void copy(circulant_matrix<NumericT, AlignmentV>& gpu_mat, std::vector<NumericT>& cpu_vec)
-{
-  assert(cpu_vec.size() == gpu_mat.size1() && bool("Size mismatch"));
-  copy(gpu_mat.elements(), cpu_vec);
-}
-
-/** @brief Copies a circulant matrix from the OpenCL device (either GPU or multi-core CPU) to the matrix-like object
-  *
-  *
-  * @param circ_src   A circulant_matrix from ViennaCL
-  * @param com_dst   A matrix-like object
-  */
-template<typename NumericT, unsigned int AlignmentV, typename MatrixT>
-void copy(circulant_matrix<NumericT, AlignmentV>& circ_src, MatrixT& com_dst)
-{
-  vcl_size_t size = circ_src.size1();
-  assert(size == viennacl::traits::size1(com_dst) && bool("Size mismatch"));
-  assert(size == viennacl::traits::size2(com_dst) && bool("Size mismatch"));
-  std::vector<NumericT> tmp(size);
-  copy(circ_src, tmp);
-
-  for (vcl_size_t i = 0; i < size; i++)
-  {
-    for (vcl_size_t j = 0; j < size; j++)
-    {
-      long index = static_cast<long>(i) - static_cast<long>(j);
-      if (index < 0)
-        index += static_cast<long>(size);
-      com_dst(i, j) = tmp[static_cast<vcl_size_t>(index)];
-    }
-  }
-}
-
-/** @brief Copies a the matrix-like object to the circulant matrix from the OpenCL device (either GPU or multi-core CPU)
-  *
-  *
-  * @param com_src   A std::vector on the host
-  * @param circ_dst   A circulant_matrix from ViennaCL
-  */
-template<typename NumericT, unsigned int AlignmentV, typename MatrixT>
-void copy(MatrixT& com_src, circulant_matrix<NumericT, AlignmentV>& circ_dst)
-{
-  assert( (circ_dst.size1() == 0 || circ_dst.size1() == viennacl::traits::size1(com_src)) && bool("Size mismatch"));
-  assert( (circ_dst.size2() == 0 || circ_dst.size2() == viennacl::traits::size2(com_src)) && bool("Size mismatch"));
-
-  vcl_size_t size = viennacl::traits::size1(com_src);
-
-  std::vector<NumericT> tmp(size);
-
-  for (vcl_size_t i = 0; i < size; i++) tmp[i] = com_src(i, 0);
-
-  copy(tmp, circ_dst);
-}
-
-/*namespace linalg
-  {
-    template<typename NumericT, unsigned int AlignmentV, unsigned int VECTOR_AlignmentV>
-    void prod_impl(circulant_matrix<NumericT, AlignmentV> const & mat,
-                    vector<NumericT, VECTOR_AlignmentV> const & vec,
-                    vector<NumericT, VECTOR_AlignmentV>& result) {
-        viennacl::vector<NumericT, VECTOR_AlignmentV> circ(mat.elements().size() * 2);
-        fft::real_to_complex(mat.elements(), circ, mat.elements().size());
-
-        viennacl::vector<NumericT, VECTOR_AlignmentV> tmp(vec.size() * 2);
-        viennacl::vector<NumericT, VECTOR_AlignmentV> tmp2(vec.size() * 2);
-
-        fft::real_to_complex(vec, tmp, vec.size());
-        fft::convolve(circ, tmp, tmp2);
-        fft::complex_to_real(tmp2, result, vec.size());
-    }
-  }*/
-
-/** @brief Prints the matrix. Output is compatible to boost::numeric::ublas
-  *
-  * @param s            STL output stream
-  * @param gpu_matrix   A ViennaCL circulant matrix
-  */
-template<class NumericT, unsigned int AlignmentV>
-std::ostream & operator<<(std::ostream& s, circulant_matrix<NumericT, AlignmentV>& gpu_matrix)
-{
-  vcl_size_t size = gpu_matrix.size1();
-  std::vector<NumericT> tmp(size);
-  copy(gpu_matrix, tmp);
-  s << "[" << size << "," << size << "](";
-
-  for (vcl_size_t i = 0; i < size; i++)
-  {
-    s << "(";
-    for (vcl_size_t j = 0; j < size; j++)
-    {
-      long index = static_cast<long>(i) - static_cast<long>(j);
-      if (index < 0) index = static_cast<long>(size) + index;
-      s << tmp[vcl_size_t(index)];
-      //s << index;
-      if (j < (size - 1)) s << ",";
-    }
-    s << ")";
-  }
-  s << ")";
-  return s;
-}
-
-//
-// Specify available operations:
-//
-
-/** \cond */
-
-namespace linalg
-{
-namespace detail
-{
-  // x = A * y
-  template<typename T, unsigned int A>
-  struct op_executor<vector_base<T>, op_assign, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> >
-  {
-    static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
-    {
-      // check for the special case x = A * x
-      if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
-      {
-        viennacl::vector<T> temp(lhs);
-        viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
-        lhs = temp;
-      }
-      else
-        viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
-    }
-  };
-
-  template<typename T, unsigned int A>
-  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> >
-  {
-    static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
-    {
-      viennacl::vector<T> temp(lhs);
-      viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
-      lhs += temp;
-    }
-  };
-
-  template<typename T, unsigned int A>
-  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> >
-  {
-    static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
-    {
-      viennacl::vector<T> temp(lhs);
-      viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
-      lhs -= temp;
-    }
-  };
-
-
-  // x = A * vec_op
-  template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
-  struct op_executor<vector_base<T>, op_assign, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
-  {
-    static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
-    {
-      viennacl::vector<T> temp(rhs.rhs());
-      viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);
-    }
-  };
-
-  // x = A * vec_op
-  template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
-  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const circulant_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> >
-  {
-    static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
-    {
-      viennacl::vector<T> temp(rhs.rhs());
-      viennacl::vector<T> temp_result(lhs);
-      viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
-      lhs += temp_result;
-    }
-  };
-
-  // x = A * vec_op
-  template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
-  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
-  {
-    static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
-    {
-      viennacl::vector<T> temp(rhs.rhs());
-      viennacl::vector<T> temp_result(lhs);
-      viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
-      lhs -= temp_result;
-    }
-  };
-
-} // namespace detail
-} // namespace linalg
-
-/** \endcond */
-}
-
-#endif // VIENNACL_CIRCULANT_MATRIX_HPP