You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ap...@apache.org on 2016/06/08 21:40:44 UTC
[47/51] [partial] mahout git commit: (nojira) add native-viennaCL
module to codebase. closes apache/mahout#241
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp
new file mode 100644
index 0000000..e463e88
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp
@@ -0,0 +1,101 @@
+/* =========================================================================
+ Copyright (c) 2010-2014, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the PDF manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl.hpp"
+#include "viennacl/backend/mem_handle.hpp"
+
+
+
+static ViennaCLStatus init_cuda_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A)
+{
+#ifdef VIENNACL_WITH_CUDA
+ h.switch_active_handle_id(viennacl::CUDA_MEMORY);
+ h.cuda_handle().reset(A->cuda_mem);
+ h.cuda_handle().inc();
+ if (A->precision == ViennaCLFloat)
+ h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(float)); // not necessary, but still set for conciseness
+ else if (A->precision == ViennaCLDouble)
+ h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(double)); // not necessary, but still set for conciseness
+ else
+ return ViennaCLGenericFailure;
+
+ return ViennaCLSuccess;
+#else
+ (void)h;
+ (void)A;
+ return ViennaCLGenericFailure;
+#endif
+}
+
+static ViennaCLStatus init_opencl_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A)
+{
+#ifdef VIENNACL_WITH_OPENCL
+ h.switch_active_handle_id(viennacl::OPENCL_MEMORY);
+ h.opencl_handle() = A->opencl_mem;
+ h.opencl_handle().inc();
+ if (A->precision == ViennaCLFloat)
+ h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(float)); // not necessary, but still set for conciseness
+ else if (A->precision == ViennaCLDouble)
+ h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(double)); // not necessary, but still set for conciseness
+ else
+ return ViennaCLGenericFailure;
+
+ return ViennaCLSuccess;
+#else
+ (void)h;
+ (void)A;
+ return ViennaCLGenericFailure;
+#endif
+}
+
+
+static ViennaCLStatus init_host_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A)
+{
+ h.switch_active_handle_id(viennacl::MAIN_MEMORY);
+ h.ram_handle().reset(A->host_mem);
+ h.ram_handle().inc();
+ if (A->precision == ViennaCLFloat)
+ h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(float)); // not necessary, but still set for conciseness
+ else if (A->precision == ViennaCLDouble)
+ h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(double)); // not necessary, but still set for conciseness
+ else
+ return ViennaCLGenericFailure;
+
+ return ViennaCLSuccess;
+}
+
+
+static ViennaCLStatus init_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A)
+{
+ switch (A->backend->backend_type)
+ {
+ case ViennaCLCUDA:
+ return init_cuda_matrix(h, A);
+
+ case ViennaCLOpenCL:
+ return init_opencl_matrix(h, A);
+
+ case ViennaCLHost:
+ return init_host_matrix(h, A);
+
+ default:
+ return ViennaCLGenericFailure;
+ }
+}
+
+
+
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp
new file mode 100644
index 0000000..8be00d7
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp
@@ -0,0 +1,101 @@
+/* =========================================================================
+ Copyright (c) 2010-2014, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the PDF manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include "viennacl.hpp"
+#include "viennacl/backend/mem_handle.hpp"
+
+
+
+static ViennaCLStatus init_cuda_vector(viennacl::backend::mem_handle & h, ViennaCLVector x)
+{
+#ifdef VIENNACL_WITH_CUDA
+ h.switch_active_handle_id(viennacl::CUDA_MEMORY);
+ h.cuda_handle().reset(x->cuda_mem);
+ h.cuda_handle().inc();
+ if (x->precision == ViennaCLFloat)
+ h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * x->size * sizeof(float)); // not necessary, but still set for conciseness
+ else if (x->precision == ViennaCLDouble)
+ h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * x->size * sizeof(double)); // not necessary, but still set for conciseness
+ else
+ return ViennaCLGenericFailure;
+
+ return ViennaCLSuccess;
+#else
+ (void)h;
+ (void)x;
+ return ViennaCLGenericFailure;
+#endif
+}
+
+static ViennaCLStatus init_opencl_vector(viennacl::backend::mem_handle & h, ViennaCLVector x)
+{
+#ifdef VIENNACL_WITH_OPENCL
+ h.switch_active_handle_id(viennacl::OPENCL_MEMORY);
+ h.opencl_handle() = x->opencl_mem;
+ h.opencl_handle().inc();
+ if (x->precision == ViennaCLFloat)
+ h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(float)); // not necessary, but still set for conciseness
+ else if (x->precision == ViennaCLDouble)
+ h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(double)); // not necessary, but still set for conciseness
+ else
+ return ViennaCLGenericFailure;
+
+ return ViennaCLSuccess;
+#else
+ (void)h;
+ (void)x;
+ return ViennaCLGenericFailure;
+#endif
+}
+
+
+static ViennaCLStatus init_host_vector(viennacl::backend::mem_handle & h, ViennaCLVector x)
+{
+ h.switch_active_handle_id(viennacl::MAIN_MEMORY);
+ h.ram_handle().reset(x->host_mem);
+ h.ram_handle().inc();
+ if (x->precision == ViennaCLFloat)
+ h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(float)); // not necessary, but still set for conciseness
+ else if (x->precision == ViennaCLDouble)
+ h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(double)); // not necessary, but still set for conciseness
+ else
+ return ViennaCLGenericFailure;
+
+ return ViennaCLSuccess;
+}
+
+
+static ViennaCLStatus init_vector(viennacl::backend::mem_handle & h, ViennaCLVector x)
+{
+ switch (x->backend->backend_type)
+ {
+ case ViennaCLCUDA:
+ return init_cuda_vector(h, x);
+
+ case ViennaCLOpenCL:
+ return init_opencl_vector(h, x);
+
+ case ViennaCLHost:
+ return init_host_vector(h, x);
+
+ default:
+ return ViennaCLGenericFailure;
+ }
+}
+
+
+
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp
new file mode 100644
index 0000000..c66c848
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp
@@ -0,0 +1,141 @@
+#ifndef VIENNACL_VIENNACL_PRIVATE_HPP
+#define VIENNACL_VIENNACL_PRIVATE_HPP
+
+
+/* =========================================================================
+ Copyright (c) 2010-2014, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the PDF manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include <stdlib.h>
+
+#ifdef VIENNACL_WITH_OPENCL
+#ifdef __APPLE__
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
+#endif
+
+#include "viennacl.hpp"
+
+
+/************* Backend Management ******************/
+
+struct ViennaCLCUDABackend_impl
+{
+ //TODO: Add stream and/or device descriptors here
+};
+
+struct ViennaCLOpenCLBackend_impl
+{
+ ViennaCLInt context_id;
+};
+
+struct ViennaCLHostBackend_impl
+{
+ // Nothing to specify *at the moment*
+};
+
+
+/** @brief Generic backend for CUDA, OpenCL, host-based stuff */
+struct ViennaCLBackend_impl
+{
+ ViennaCLBackendTypes backend_type;
+
+ ViennaCLCUDABackend_impl cuda_backend;
+ ViennaCLOpenCLBackend_impl opencl_backend;
+ ViennaCLHostBackend_impl host_backend;
+};
+
+
+
+/******** User Types **********/
+
+struct ViennaCLHostScalar_impl
+{
+ ViennaCLPrecision precision;
+
+ union {
+ float value_float;
+ double value_double;
+ };
+};
+
+struct ViennaCLScalar_impl
+{
+ ViennaCLBackend backend;
+ ViennaCLPrecision precision;
+
+ // buffer:
+#ifdef VIENNACL_WITH_CUDA
+ char * cuda_mem;
+#endif
+#ifdef VIENNACL_WITH_OPENCL
+ cl_mem opencl_mem;
+#endif
+ char * host_mem;
+
+ ViennaCLInt offset;
+};
+
+struct ViennaCLVector_impl
+{
+ ViennaCLBackend backend;
+ ViennaCLPrecision precision;
+
+ // buffer:
+#ifdef VIENNACL_WITH_CUDA
+ char * cuda_mem;
+#endif
+#ifdef VIENNACL_WITH_OPENCL
+ cl_mem opencl_mem;
+#endif
+ char * host_mem;
+
+ ViennaCLInt offset;
+ ViennaCLInt inc;
+ ViennaCLInt size;
+};
+
+struct ViennaCLMatrix_impl
+{
+ ViennaCLBackend backend;
+ ViennaCLPrecision precision;
+ ViennaCLOrder order;
+ ViennaCLTranspose trans;
+
+ // buffer:
+#ifdef VIENNACL_WITH_CUDA
+ char * cuda_mem;
+#endif
+#ifdef VIENNACL_WITH_OPENCL
+ cl_mem opencl_mem;
+#endif
+ char * host_mem;
+
+ ViennaCLInt size1;
+ ViennaCLInt start1;
+ ViennaCLInt stride1;
+ ViennaCLInt internal_size1;
+
+ ViennaCLInt size2;
+ ViennaCLInt start2;
+ ViennaCLInt stride2;
+ ViennaCLInt internal_size2;
+};
+
+
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp
new file mode 100644
index 0000000..ccfd035
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp
@@ -0,0 +1,171 @@
+#ifndef VIENNACL_BACKEND_CPU_RAM_HPP_
+#define VIENNACL_BACKEND_CPU_RAM_HPP_
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/backend/cpu_ram.hpp
+ @brief Implementations for the OpenCL backend functionality
+*/
+
+#include <cassert>
+#include <vector>
+#ifdef VIENNACL_WITH_AVX2
+#include <stdlib.h>
+#endif
+
+#include "viennacl/forwards.h"
+#include "viennacl/tools/shared_ptr.hpp"
+
+namespace viennacl
+{
+namespace backend
+{
+namespace cpu_ram
+{
+typedef viennacl::tools::shared_ptr<char> handle_type;
+// Requirements for backend:
+
+// * memory_create(size, host_ptr)
+// * memory_copy(src, dest, offset_src, offset_dest, size)
+// * memory_write_from_main_memory(src, offset, size,
+// dest, offset, size)
+// * memory_read_to_main_memory(src, offset, size
+// dest, offset, size)
+// *
+//
+
+namespace detail
+{
+ /** @brief Helper struct for deleting an pointer to an array */
+ template<class U>
+ struct array_deleter
+ {
+#ifdef VIENNACL_WITH_AVX2
+ void operator()(U* p) const { free(p); }
+#else
+ void operator()(U* p) const { delete[] p; }
+#endif
+ };
+
+}
+
+/** @brief Creates an array of the specified size in main RAM. If the second argument is provided, the buffer is initialized with data from that pointer.
+ *
+ * @param size_in_bytes Number of bytes to allocate
+ * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data.
+ *
+ */
+inline handle_type memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL)
+{
+#ifdef VIENNACL_WITH_AVX2
+ // Note: aligned_alloc not available on all compilers. Consider platform-specific alternatives such as posix_memalign()
+ if (!host_ptr)
+ return handle_type(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>());
+
+ handle_type new_handle(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>());
+#else
+ if (!host_ptr)
+ return handle_type(new char[size_in_bytes], detail::array_deleter<char>());
+
+ handle_type new_handle(new char[size_in_bytes], detail::array_deleter<char>());
+#endif
+
+ // copy data:
+ char * raw_ptr = new_handle.get();
+ const char * data_ptr = static_cast<const char *>(host_ptr);
+#ifdef VIENNACL_WITH_OPENMP
+ #pragma omp parallel for
+#endif
+ for (long i=0; i<long(size_in_bytes); ++i)
+ raw_ptr[i] = data_ptr[i];
+
+ return new_handle;
+}
+
+/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'dst_buffer + dst_offset'.
+ *
+ * @param src_buffer A smart pointer to the begin of an allocated buffer
+ * @param dst_buffer A smart pointer to the end of an allocated buffer
+ * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes)
+ * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes)
+ * @param bytes_to_copy Number of bytes to be copied
+ */
+inline void memory_copy(handle_type const & src_buffer,
+ handle_type & dst_buffer,
+ vcl_size_t src_offset,
+ vcl_size_t dst_offset,
+ vcl_size_t bytes_to_copy)
+{
+ assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
+ assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
+
+#ifdef VIENNACL_WITH_OPENMP
+ #pragma omp parallel for
+#endif
+ for (long i=0; i<long(bytes_to_copy); ++i)
+ dst_buffer.get()[vcl_size_t(i)+dst_offset] = src_buffer.get()[vcl_size_t(i) + src_offset];
+}
+
+/** @brief Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'
+ *
+ * @param dst_buffer A smart pointer to the beginning of an allocated buffer
+ * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes)
+ * @param bytes_to_copy Number of bytes to be copied
+ * @param ptr Pointer to the first byte to be written
+ */
+inline void memory_write(handle_type & dst_buffer,
+ vcl_size_t dst_offset,
+ vcl_size_t bytes_to_copy,
+ const void * ptr,
+ bool /*async*/)
+{
+ assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
+
+#ifdef VIENNACL_WITH_OPENMP
+ #pragma omp parallel for
+#endif
+ for (long i=0; i<long(bytes_to_copy); ++i)
+ dst_buffer.get()[vcl_size_t(i)+dst_offset] = static_cast<const char *>(ptr)[i];
+}
+
+/** @brief Reads data from a buffer back to main RAM.
+ *
+ * @param src_buffer A smart pointer to the beginning of an allocated source buffer
+ * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_
+ * @param bytes_to_copy Number of bytes to be read
+ * @param ptr Location in main RAM where to read data should be written to
+ */
+inline void memory_read(handle_type const & src_buffer,
+ vcl_size_t src_offset,
+ vcl_size_t bytes_to_copy,
+ void * ptr,
+ bool /*async*/)
+{
+ assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
+
+#ifdef VIENNACL_WITH_OPENMP
+ #pragma omp parallel for
+#endif
+ for (long i=0; i<long(bytes_to_copy); ++i)
+ static_cast<char *>(ptr)[i] = src_buffer.get()[vcl_size_t(i)+src_offset];
+}
+
+}
+} //backend
+} //viennacl
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp
new file mode 100644
index 0000000..641bfea
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp
@@ -0,0 +1,206 @@
+#ifndef VIENNACL_BACKEND_CUDA_HPP_
+#define VIENNACL_BACKEND_CUDA_HPP_
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/backend/cuda.hpp
+ @brief Implementations for the CUDA backend functionality
+*/
+
+
+#include <iostream>
+#include <vector>
+#include <cassert>
+#include <stdexcept>
+#include <sstream>
+
+#include "viennacl/forwards.h"
+#include "viennacl/tools/shared_ptr.hpp"
+
+// includes CUDA
+#include <cuda_runtime.h>
+
+#define VIENNACL_CUDA_ERROR_CHECK(err) detail::cuda_error_check (err, __FILE__, __LINE__)
+
+namespace viennacl
+{
+namespace backend
+{
+namespace cuda
+{
+
+typedef viennacl::tools::shared_ptr<char> handle_type;
+// Requirements for backend:
+
+// * memory_create(size, host_ptr)
+// * memory_copy(src, dest, offset_src, offset_dest, size)
+// * memory_write_from_main_memory(src, offset, size,
+// dest, offset, size)
+// * memory_read_to_main_memory(src, offset, size
+// dest, offset, size)
+// *
+//
+
+class cuda_exception : public std::runtime_error
+{
+public:
+ cuda_exception(std::string const & what_arg, cudaError_t err_code) : std::runtime_error(what_arg), error_code_(err_code) {}
+
+ cudaError_t error_code() const { return error_code_; }
+
+private:
+ cudaError_t error_code_;
+};
+
+namespace detail
+{
+
+ inline void cuda_error_check(cudaError error_code, const char *file, const int line )
+ {
+ if (cudaSuccess != error_code)
+ {
+ std::stringstream ss;
+ ss << file << "(" << line << "): " << ": CUDA Runtime API error " << error_code << ": " << cudaGetErrorString( error_code ) << std::endl;
+ throw viennacl::backend::cuda::cuda_exception(ss.str(), error_code);
+ }
+ }
+
+
+ /** @brief Functor for deleting a CUDA handle. Used within the smart pointer class. */
+ template<typename U>
+ struct cuda_deleter
+ {
+ void operator()(U * p) const
+ {
+ //std::cout << "Freeing handle " << reinterpret_cast<void *>(p) << std::endl;
+ cudaFree(p);
+ }
+ };
+
+}
+
+/** @brief Creates an array of the specified size on the CUDA device. If the second argument is provided, the buffer is initialized with data from that pointer.
+ *
+ * @param size_in_bytes Number of bytes to allocate
+ * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data.
+ *
+ */
+inline handle_type memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL)
+{
+ void * dev_ptr = NULL;
+ VIENNACL_CUDA_ERROR_CHECK( cudaMalloc(&dev_ptr, size_in_bytes) );
+ //std::cout << "Allocated new dev_ptr " << dev_ptr << " of size " << size_in_bytes << std::endl;
+
+ if (!host_ptr)
+ return handle_type(reinterpret_cast<char *>(dev_ptr), detail::cuda_deleter<char>());
+
+ handle_type new_handle(reinterpret_cast<char*>(dev_ptr), detail::cuda_deleter<char>());
+
+ // copy data:
+ //std::cout << "Filling new handle from host_ptr " << host_ptr << std::endl;
+ cudaMemcpy(new_handle.get(), host_ptr, size_in_bytes, cudaMemcpyHostToDevice);
+
+ return new_handle;
+}
+
+
+/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' on the CUDA device to memory starting at address 'dst_buffer + dst_offset' on the same CUDA device.
+ *
+ * @param src_buffer A smart pointer to the begin of an allocated CUDA buffer
+ * @param dst_buffer A smart pointer to the end of an allocated CUDA buffer
+ * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes)
+ * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes)
+ * @param bytes_to_copy Number of bytes to be copied
+ */
+inline void memory_copy(handle_type const & src_buffer,
+ handle_type & dst_buffer,
+ vcl_size_t src_offset,
+ vcl_size_t dst_offset,
+ vcl_size_t bytes_to_copy)
+{
+ assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
+ assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
+
+ cudaMemcpy(reinterpret_cast<void *>(dst_buffer.get() + dst_offset),
+ reinterpret_cast<void *>(src_buffer.get() + src_offset),
+ bytes_to_copy,
+ cudaMemcpyDeviceToDevice);
+}
+
+
+/** @brief Writes data from main RAM identified by 'ptr' to the CUDA buffer identified by 'dst_buffer'
+ *
+ * @param dst_buffer A smart pointer to the beginning of an allocated CUDA buffer
+ * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes)
+ * @param bytes_to_copy Number of bytes to be copied
+ * @param ptr Pointer to the first byte to be written
+ * @param async Whether the operation should be asynchronous
+ */
+inline void memory_write(handle_type & dst_buffer,
+ vcl_size_t dst_offset,
+ vcl_size_t bytes_to_copy,
+ const void * ptr,
+ bool async = false)
+{
+ assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
+
+ if (async)
+ cudaMemcpyAsync(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,
+ reinterpret_cast<const char *>(ptr),
+ bytes_to_copy,
+ cudaMemcpyHostToDevice);
+ else
+ cudaMemcpy(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,
+ reinterpret_cast<const char *>(ptr),
+ bytes_to_copy,
+ cudaMemcpyHostToDevice);
+}
+
+
+/** @brief Reads data from a CUDA buffer back to main RAM.
+ *
+ * @param src_buffer A smart pointer to the beginning of an allocated CUDA source buffer
+ * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_
+ * @param bytes_to_copy Number of bytes to be read
+ * @param ptr Location in main RAM where to read data should be written to
+ * @param async Whether the operation should be asynchronous
+ */
+inline void memory_read(handle_type const & src_buffer,
+ vcl_size_t src_offset,
+ vcl_size_t bytes_to_copy,
+ void * ptr,
+ bool async = false)
+{
+ assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
+
+ if (async)
+ cudaMemcpyAsync(reinterpret_cast<char *>(ptr),
+ reinterpret_cast<char *>(src_buffer.get()) + src_offset,
+ bytes_to_copy,
+ cudaMemcpyDeviceToHost);
+ else
+ cudaMemcpy(reinterpret_cast<char *>(ptr),
+ reinterpret_cast<char *>(src_buffer.get()) + src_offset,
+ bytes_to_copy,
+ cudaMemcpyDeviceToHost);
+}
+
+} //cuda
+} //backend
+} //viennacl
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp
new file mode 100644
index 0000000..37c680b
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp
@@ -0,0 +1,250 @@
+#ifndef VIENNACL_BACKEND_MEM_HANDLE_HPP
+#define VIENNACL_BACKEND_MEM_HANDLE_HPP
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/backend/mem_handle.hpp
+ @brief Implements the multi-memory-domain handle
+*/
+
+#include <vector>
+#include <cassert>
+#include "viennacl/forwards.h"
+#include "viennacl/tools/shared_ptr.hpp"
+#include "viennacl/backend/cpu_ram.hpp"
+
+#ifdef VIENNACL_WITH_OPENCL
+#include "viennacl/backend/opencl.hpp"
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+#include "viennacl/backend/cuda.hpp"
+#endif
+
+
+namespace viennacl
+{
+namespace backend
+{
+
+namespace detail
+{
+ /** @brief Singleton for managing the default memory type.
+ *
+ * @param new_mem_type If NULL, returns the current memory type. Otherwise, sets the memory type to the provided value.
+ */
+ inline memory_types get_set_default_memory_type(memory_types * new_mem_type)
+ {
+ // if a user compiles with CUDA, it is reasonable to expect that CUDA should be the default
+#ifdef VIENNACL_WITH_CUDA
+ static memory_types mem_type = CUDA_MEMORY;
+#elif defined(VIENNACL_WITH_OPENCL)
+ static memory_types mem_type = OPENCL_MEMORY;
+#else
+ static memory_types mem_type = MAIN_MEMORY;
+#endif
+
+ if (new_mem_type)
+ mem_type = *new_mem_type;
+
+ return mem_type;
+ }
+}
+
+/** @brief Returns the default memory type for the given configuration.
+ *
+ * CUDA has precedence over OpenCL, which has precedence over main memory. Depends on which VIENNACL_WITH_{CUDA/OPENCL/OPENMP} macros are defined.
+ */
+inline memory_types default_memory_type() { return detail::get_set_default_memory_type(NULL); }
+
+/** @brief Sets the default memory type for the given configuration.
+ *
+ * Make sure the respective new memory type is enabled.
+ * For example, passing CUDA_MEMORY if no CUDA backend is selected will result in exceptions being thrown as soon as you try to allocate buffers.
+ */
+inline memory_types default_memory_type(memory_types new_memory_type) { return detail::get_set_default_memory_type(&new_memory_type); }
+
+
+/** @brief Main abstraction class for multiple memory domains. Represents a buffer in either main RAM, an OpenCL context, or a CUDA device.
+ *
+ * The idea is to wrap all possible handle types inside this class so that higher-level code does not need to be cluttered with preprocessor switches.
+ * Instead, this class collects all the necessary conditional compilations.
+ *
+ */
+class mem_handle
+{
+public:
+ typedef viennacl::tools::shared_ptr<char> ram_handle_type;
+ typedef viennacl::tools::shared_ptr<char> cuda_handle_type;
+
+ /** @brief Default CTOR. No memory is allocated */
+ mem_handle() : active_handle_(MEMORY_NOT_INITIALIZED), size_in_bytes_(0) {}
+
+ /** @brief Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated. */
+ ram_handle_type & ram_handle() { return ram_handle_; }
+ /** @brief Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated. */
+ ram_handle_type const & ram_handle() const { return ram_handle_; }
+
+#ifdef VIENNACL_WITH_OPENCL
+ /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */
+ viennacl::ocl::handle<cl_mem> & opencl_handle() { return opencl_handle_; }
+ /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */
+ viennacl::ocl::handle<cl_mem> const & opencl_handle() const { return opencl_handle_; }
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+ /** @brief Returns the handle to a CUDA buffer. The handle contains NULL if no such buffer has been allocated. */
+ cuda_handle_type & cuda_handle() { return cuda_handle_; }
+ /** @brief Returns the handle to a CUDA buffer. The handle contains NULL if no such buffer has been allocated. */
+ cuda_handle_type const & cuda_handle() const { return cuda_handle_; }
+#endif
+
+ /** @brief Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no data. */
+ memory_types get_active_handle_id() const { return active_handle_; }
+
+ /** @brief Switches the currently active handle. If no support for that backend is provided, an exception is thrown. */
+ void switch_active_handle_id(memory_types new_id)
+ {
+ if (new_id != active_handle_)
+ {
+ if (active_handle_ == MEMORY_NOT_INITIALIZED)
+ active_handle_ = new_id;
+ else if (active_handle_ == MAIN_MEMORY)
+ {
+ active_handle_ = new_id;
+ }
+ else if (active_handle_ == OPENCL_MEMORY)
+ {
+#ifdef VIENNACL_WITH_OPENCL
+ active_handle_ = new_id;
+#else
+ throw memory_exception("compiled without OpenCL suppport!");
+#endif
+ }
+ else if (active_handle_ == CUDA_MEMORY)
+ {
+#ifdef VIENNACL_WITH_CUDA
+ active_handle_ = new_id;
+#else
+ throw memory_exception("compiled without CUDA suppport!");
+#endif
+ }
+ else
+ throw memory_exception("invalid new memory region!");
+ }
+ }
+
+ /** @brief Compares the two handles and returns true if the active memory handles in the two mem_handles point to the same buffer. */
+ bool operator==(mem_handle const & other) const
+ {
+ if (active_handle_ != other.active_handle_)
+ return false;
+
+ switch (active_handle_)
+ {
+ case MAIN_MEMORY:
+ return ram_handle_.get() == other.ram_handle_.get();
+#ifdef VIENNACL_WITH_OPENCL
+ case OPENCL_MEMORY:
+ return opencl_handle_.get() == other.opencl_handle_.get();
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case CUDA_MEMORY:
+ return cuda_handle_.get() == other.cuda_handle_.get();
+#endif
+ default: break;
+ }
+
+ return false;
+ }
+
+ /** @brief Compares the two handles and returns true if the active memory handles in the two mem_handles point a buffer with inferior address
+ * useful to store handles into a map, since they naturally have strong ordering
+ */
+ bool operator<(mem_handle const & other) const
+ {
+ if (active_handle_ != other.active_handle_)
+ return false;
+
+ switch (active_handle_)
+ {
+ case MAIN_MEMORY:
+ return ram_handle_.get() < other.ram_handle_.get();
+#ifdef VIENNACL_WITH_OPENCL
+ case OPENCL_MEMORY:
+ return opencl_handle_.get() < other.opencl_handle_.get();
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case CUDA_MEMORY:
+ return cuda_handle_.get() < other.cuda_handle_.get();
+#endif
+ default: break;
+ }
+
+ return false;
+ }
+
+
+ bool operator!=(mem_handle const & other) const { return !(*this == other); }
+
+ /** @brief Implements a fast swapping method. No data is copied, only the handles are exchanged. */
+ void swap(mem_handle & other)
+ {
+ // swap handle type:
+ memory_types active_handle_tmp = other.active_handle_;
+ other.active_handle_ = active_handle_;
+ active_handle_ = active_handle_tmp;
+
+ // swap ram handle:
+ ram_handle_type ram_handle_tmp = other.ram_handle_;
+ other.ram_handle_ = ram_handle_;
+ ram_handle_ = ram_handle_tmp;
+
+ // swap OpenCL handle:
+#ifdef VIENNACL_WITH_OPENCL
+ opencl_handle_.swap(other.opencl_handle_);
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ cuda_handle_type cuda_handle_tmp = other.cuda_handle_;
+ other.cuda_handle_ = cuda_handle_;
+ cuda_handle_ = cuda_handle_tmp;
+#endif
+ }
+
+ /** @brief Returns the number of bytes of the currently active buffer */
+ vcl_size_t raw_size() const { return size_in_bytes_; }
+
+ /** @brief Sets the size of the currently active buffer. Use with care! */
+ void raw_size(vcl_size_t new_size) { size_in_bytes_ = new_size; }
+
+private:
+ memory_types active_handle_;
+ ram_handle_type ram_handle_;
+#ifdef VIENNACL_WITH_OPENCL
+ viennacl::ocl::handle<cl_mem> opencl_handle_;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ cuda_handle_type cuda_handle_;
+#endif
+ vcl_size_t size_in_bytes_;
+};
+
+
+} //backend
+} //viennacl
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp
new file mode 100644
index 0000000..d6f29a5
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp
@@ -0,0 +1,628 @@
+#ifndef VIENNACL_BACKEND_MEMORY_HPP
+#define VIENNACL_BACKEND_MEMORY_HPP
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/backend/memory.hpp
+ @brief Main interface routines for memory management
+*/
+
+#include <vector>
+#include <cassert>
+#include "viennacl/forwards.h"
+#include "viennacl/backend/mem_handle.hpp"
+#include "viennacl/context.hpp"
+#include "viennacl/traits/handle.hpp"
+#include "viennacl/traits/context.hpp"
+#include "viennacl/backend/util.hpp"
+
+#include "viennacl/backend/cpu_ram.hpp"
+
+#ifdef VIENNACL_WITH_OPENCL
+#include "viennacl/backend/opencl.hpp"
+#include "viennacl/ocl/backend.hpp"
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+#include "viennacl/backend/cuda.hpp"
+#endif
+
+
+namespace viennacl
+{
+namespace backend
+{
+
+
+ // if a user compiles with CUDA, it is reasonable to expect that CUDA should be the default
+ /** @brief Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed. */
+ inline void finish()
+ {
+#ifdef VIENNACL_WITH_CUDA
+ cudaDeviceSynchronize();
+#endif
+#ifdef VIENNACL_WITH_OPENCL
+ viennacl::ocl::get_queue().finish();
+#endif
+ }
+
+
+
+
+ // Requirements for backend:
+
+ // ---- Memory ----
+ //
+ // * memory_create(size, host_ptr)
+ // * memory_copy(src, dest, offset_src, offset_dest, size)
+ // * memory_write(src, offset, size, ptr)
+ // * memory_read(src, offset, size, ptr)
+ //
+
+ /** @brief Creates an array of the specified size. If the second argument is provided, the buffer is initialized with data from that pointer.
+ *
+ * This is the generic version for CPU RAM, CUDA, and OpenCL. Creates the memory in the currently active memory domain.
+ *
+ * @param handle The generic wrapper handle for multiple memory domains which will hold the new buffer.
+ * @param size_in_bytes Number of bytes to allocate
+ * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host)
+ * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data.
+ *
+ */
+ inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL)
+ {
+ if (size_in_bytes > 0)
+ {
+ if (handle.get_active_handle_id() == MEMORY_NOT_INITIALIZED)
+ handle.switch_active_handle_id(ctx.memory_type());
+
+ switch (handle.get_active_handle_id())
+ {
+ case MAIN_MEMORY:
+ handle.ram_handle() = cpu_ram::memory_create(size_in_bytes, host_ptr);
+ handle.raw_size(size_in_bytes);
+ break;
+#ifdef VIENNACL_WITH_OPENCL
+ case OPENCL_MEMORY:
+ handle.opencl_handle().context(ctx.opencl_context());
+ handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr);
+ handle.raw_size(size_in_bytes);
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case CUDA_MEMORY:
+ handle.cuda_handle() = cuda::memory_create(size_in_bytes, host_ptr);
+ handle.raw_size(size_in_bytes);
+ break;
+#endif
+ case MEMORY_NOT_INITIALIZED:
+ throw memory_exception("not initialised!");
+ default:
+ throw memory_exception("unknown memory handle!");
+ }
+ }
+ }
+
+ /*
+ inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, const void * host_ptr = NULL)
+ {
+ viennacl::context ctx(default_memory_type());
+ memory_create(handle, size_in_bytes, ctx, host_ptr);
+ }*/
+
+
+ /** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'dst_buffer + dst_offset'.
+ *
+ * This is the generic version for CPU RAM, CUDA, and OpenCL. Copies the memory in the currently active memory domain.
+ *
+ *
+ * @param src_buffer A smart pointer to the begin of an allocated buffer
+ * @param dst_buffer A smart pointer to the end of an allocated buffer
+ * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes)
+ * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes)
+ * @param bytes_to_copy Number of bytes to be copied
+ */
+ inline void memory_copy(mem_handle const & src_buffer,
+ mem_handle & dst_buffer,
+ vcl_size_t src_offset,
+ vcl_size_t dst_offset,
+ vcl_size_t bytes_to_copy)
+ {
+ assert( src_buffer.get_active_handle_id() == dst_buffer.get_active_handle_id() && bool("memory_copy() must be called on buffers from the same domain") );
+
+ if (bytes_to_copy > 0)
+ {
+ switch (src_buffer.get_active_handle_id())
+ {
+ case MAIN_MEMORY:
+ cpu_ram::memory_copy(src_buffer.ram_handle(), dst_buffer.ram_handle(), src_offset, dst_offset, bytes_to_copy);
+ break;
+#ifdef VIENNACL_WITH_OPENCL
+ case OPENCL_MEMORY:
+ opencl::memory_copy(src_buffer.opencl_handle(), dst_buffer.opencl_handle(), src_offset, dst_offset, bytes_to_copy);
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case CUDA_MEMORY:
+ cuda::memory_copy(src_buffer.cuda_handle(), dst_buffer.cuda_handle(), src_offset, dst_offset, bytes_to_copy);
+ break;
+#endif
+ case MEMORY_NOT_INITIALIZED:
+ throw memory_exception("not initialised!");
+ default:
+ throw memory_exception("unknown memory handle!");
+ }
+ }
+ }
+
+ // TODO: Refine this concept. Maybe move to constructor?
+ /** @brief A 'shallow' copy operation from an initialized buffer to an uninitialized buffer.
+ * The uninitialized buffer just copies the raw handle.
+ */
+ inline void memory_shallow_copy(mem_handle const & src_buffer,
+ mem_handle & dst_buffer)
+ {
+ assert( (dst_buffer.get_active_handle_id() == MEMORY_NOT_INITIALIZED) && bool("Shallow copy on already initialized memory not supported!"));
+
+ switch (src_buffer.get_active_handle_id())
+ {
+ case MAIN_MEMORY:
+ dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id());
+ dst_buffer.ram_handle() = src_buffer.ram_handle();
+ dst_buffer.raw_size(src_buffer.raw_size());
+ break;
+#ifdef VIENNACL_WITH_OPENCL
+ case OPENCL_MEMORY:
+ dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id());
+ dst_buffer.opencl_handle() = src_buffer.opencl_handle();
+ dst_buffer.raw_size(src_buffer.raw_size());
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case CUDA_MEMORY:
+ dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id());
+ dst_buffer.cuda_handle() = src_buffer.cuda_handle();
+ dst_buffer.raw_size(src_buffer.raw_size());
+ break;
+#endif
+ case MEMORY_NOT_INITIALIZED:
+ throw memory_exception("not initialised!");
+ default:
+ throw memory_exception("unknown memory handle!");
+ }
+ }
+
+ /** @brief Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'
+ *
+ * This is the generic version for CPU RAM, CUDA, and OpenCL. Writes the memory in the currently active memory domain.
+ *
+ * @param dst_buffer A smart pointer to the beginning of an allocated buffer
+ * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes)
+ * @param bytes_to_write Number of bytes to be written
+ * @param ptr Pointer to the first byte to be written
+ * @param async Whether the operation should be asynchronous
+ */
+ inline void memory_write(mem_handle & dst_buffer,
+ vcl_size_t dst_offset,
+ vcl_size_t bytes_to_write,
+ const void * ptr,
+ bool async = false)
+ {
+ if (bytes_to_write > 0)
+ {
+ switch (dst_buffer.get_active_handle_id())
+ {
+ case MAIN_MEMORY:
+ cpu_ram::memory_write(dst_buffer.ram_handle(), dst_offset, bytes_to_write, ptr, async);
+ break;
+#ifdef VIENNACL_WITH_OPENCL
+ case OPENCL_MEMORY:
+ opencl::memory_write(dst_buffer.opencl_handle(), dst_offset, bytes_to_write, ptr, async);
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case CUDA_MEMORY:
+ cuda::memory_write(dst_buffer.cuda_handle(), dst_offset, bytes_to_write, ptr, async);
+ break;
+#endif
+ case MEMORY_NOT_INITIALIZED:
+ throw memory_exception("not initialised!");
+ default:
+ throw memory_exception("unknown memory handle!");
+ }
+ }
+ }
+
+ /** @brief Reads data from a buffer back to main RAM.
+ *
+ * This is the generic version for CPU RAM, CUDA, and OpenCL. Reads the memory from the currently active memory domain.
+ *
+ * @param src_buffer A smart pointer to the beginning of an allocated source buffer
+ * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_
+ * @param bytes_to_read Number of bytes to be read
+ * @param ptr Location in main RAM where to read data should be written to
+ * @param async Whether the operation should be asynchronous
+ */
+ inline void memory_read(mem_handle const & src_buffer,
+ vcl_size_t src_offset,
+ vcl_size_t bytes_to_read,
+ void * ptr,
+ bool async = false)
+ {
+ //finish(); //Fixes some issues with AMD APP SDK. However, might sacrifice a few percents of performance in some cases.
+
+ if (bytes_to_read > 0)
+ {
+ switch (src_buffer.get_active_handle_id())
+ {
+ case MAIN_MEMORY:
+ cpu_ram::memory_read(src_buffer.ram_handle(), src_offset, bytes_to_read, ptr, async);
+ break;
+#ifdef VIENNACL_WITH_OPENCL
+ case OPENCL_MEMORY:
+ opencl::memory_read(src_buffer.opencl_handle(), src_offset, bytes_to_read, ptr, async);
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case CUDA_MEMORY:
+ cuda::memory_read(src_buffer.cuda_handle(), src_offset, bytes_to_read, ptr, async);
+ break;
+#endif
+ case MEMORY_NOT_INITIALIZED:
+ throw memory_exception("not initialised!");
+ default:
+ throw memory_exception("unknown memory handle!");
+ }
+ }
+ }
+
+
+
+ namespace detail
+ {
+ template<typename T>
+ vcl_size_t element_size(memory_types /* mem_type */)
+ {
+ return sizeof(T);
+ }
+
+
+ template<>
+ inline vcl_size_t element_size<unsigned long>(memory_types
+ #ifdef VIENNACL_WITH_OPENCL
+ mem_type //in order to compile cleanly at -Wextra in GCC
+ #endif
+ )
+ {
+#ifdef VIENNACL_WITH_OPENCL
+ if (mem_type == OPENCL_MEMORY)
+ return sizeof(cl_ulong);
+#endif
+ return sizeof(unsigned long);
+ }
+
+ template<>
+ inline vcl_size_t element_size<long>(memory_types
+ #ifdef VIENNACL_WITH_OPENCL
+ mem_type //in order to compile cleanly at -Wextra in GCC
+ #endif
+ )
+ {
+#ifdef VIENNACL_WITH_OPENCL
+ if (mem_type == OPENCL_MEMORY)
+ return sizeof(cl_long);
+#endif
+ return sizeof(long);
+ }
+
+
+ template<>
+ inline vcl_size_t element_size<unsigned int>(memory_types
+ #ifdef VIENNACL_WITH_OPENCL
+ mem_type //in order to compile cleanly at -Wextra in GCC
+ #endif
+ )
+ {
+#ifdef VIENNACL_WITH_OPENCL
+ if (mem_type == OPENCL_MEMORY)
+ return sizeof(cl_uint);
+#endif
+ return sizeof(unsigned int);
+ }
+
+ template<>
+ inline vcl_size_t element_size<int>(memory_types
+ #ifdef VIENNACL_WITH_OPENCL
+ mem_type //in order to compile cleanly at -Wextra in GCC
+ #endif
+ )
+ {
+#ifdef VIENNACL_WITH_OPENCL
+ if (mem_type == OPENCL_MEMORY)
+ return sizeof(cl_int);
+#endif
+ return sizeof(int);
+ }
+
+
+ }
+
+
+ /** @brief Switches the active memory domain within a memory handle. Data is copied if the new active domain differs from the old one. Memory in the source handle is not free'd. */
+ template<typename DataType>
+ void switch_memory_context(mem_handle & handle, viennacl::context new_ctx)
+ {
+ if (handle.get_active_handle_id() == new_ctx.memory_type())
+ return;
+
+ if (handle.get_active_handle_id() == viennacl::MEMORY_NOT_INITIALIZED || handle.raw_size() == 0)
+ {
+ handle.switch_active_handle_id(new_ctx.memory_type());
+#ifdef VIENNACL_WITH_OPENCL
+ if (new_ctx.memory_type() == OPENCL_MEMORY)
+ handle.opencl_handle().context(new_ctx.opencl_context());
+#endif
+ return;
+ }
+
+ vcl_size_t size_dst = detail::element_size<DataType>(handle.get_active_handle_id());
+ vcl_size_t size_src = detail::element_size<DataType>(new_ctx.memory_type());
+
+ if (size_dst != size_src) // OpenCL data element size not the same as host data element size
+ {
+ throw memory_exception("Heterogeneous data element sizes not yet supported!");
+ }
+ else //no data conversion required
+ {
+ if (handle.get_active_handle_id() == MAIN_MEMORY) //we can access the existing data directly
+ {
+ switch (new_ctx.memory_type())
+ {
+#ifdef VIENNACL_WITH_OPENCL
+ case OPENCL_MEMORY:
+ handle.opencl_handle().context(new_ctx.opencl_context());
+ handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), handle.raw_size(), handle.ram_handle().get());
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case CUDA_MEMORY:
+ handle.cuda_handle() = cuda::memory_create(handle.raw_size(), handle.ram_handle().get());
+ break;
+#endif
+ case MAIN_MEMORY:
+ default:
+ throw memory_exception("Invalid destination domain");
+ }
+ }
+#ifdef VIENNACL_WITH_OPENCL
+ else if (handle.get_active_handle_id() == OPENCL_MEMORY) // data can be dumped into destination directly
+ {
+ std::vector<DataType> buffer;
+
+ switch (new_ctx.memory_type())
+ {
+ case MAIN_MEMORY:
+ handle.ram_handle() = cpu_ram::memory_create(handle.raw_size());
+ opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), handle.ram_handle().get());
+ break;
+#ifdef VIENNACL_WITH_CUDA
+ case CUDA_MEMORY:
+ buffer.resize(handle.raw_size() / sizeof(DataType));
+ opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), &(buffer[0]));
+ cuda::memory_create(handle.cuda_handle(), handle.raw_size(), &(buffer[0]));
+ break;
+#endif
+ default:
+ throw memory_exception("Invalid destination domain");
+ }
+ }
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ else //CUDA_MEMORY
+ {
+ std::vector<DataType> buffer;
+
+ // write
+ switch (new_ctx.memory_type())
+ {
+ case MAIN_MEMORY:
+ handle.ram_handle() = cpu_ram::memory_create(handle.raw_size());
+ cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), handle.ram_handle().get());
+ break;
+#ifdef VIENNACL_WITH_OPENCL
+ case OPENCL_MEMORY:
+ buffer.resize(handle.raw_size() / sizeof(DataType));
+ cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), &(buffer[0]));
+ handle.opencl_handle() = opencl::memory_create(handle.raw_size(), &(buffer[0]));
+ break;
+#endif
+ default:
+ throw memory_exception("Unsupported source memory domain");
+ }
+ }
+#endif
+
+ // everything succeeded so far, now switch to new domain:
+ handle.switch_active_handle_id(new_ctx.memory_type());
+
+ } // no data conversion
+ }
+
+
+
+ /** @brief Copies data of the provided 'DataType' from 'handle_src' to 'handle_dst' and converts the data if the binary representation of 'DataType' among the memory domains differs. */
+ template<typename DataType>
+ void typesafe_memory_copy(mem_handle const & handle_src, mem_handle & handle_dst)
+ {
+ if (handle_dst.get_active_handle_id() == MEMORY_NOT_INITIALIZED)
+ handle_dst.switch_active_handle_id(default_memory_type());
+
+ vcl_size_t element_size_src = detail::element_size<DataType>(handle_src.get_active_handle_id());
+ vcl_size_t element_size_dst = detail::element_size<DataType>(handle_dst.get_active_handle_id());
+
+ if (element_size_src != element_size_dst)
+ {
+ // Data needs to be converted.
+
+ typesafe_host_array<DataType> buffer_src(handle_src);
+ typesafe_host_array<DataType> buffer_dst(handle_dst, handle_src.raw_size() / element_size_src);
+
+ //
+ // Step 1: Fill buffer_dst depending on where the data resides:
+ //
+ DataType const * src_data;
+ switch (handle_src.get_active_handle_id())
+ {
+ case MAIN_MEMORY:
+ src_data = reinterpret_cast<DataType const *>(handle_src.ram_handle().get());
+ for (vcl_size_t i=0; i<buffer_dst.size(); ++i)
+ buffer_dst.set(i, src_data[i]);
+ break;
+
+#ifdef VIENNACL_WITH_OPENCL
+ case OPENCL_MEMORY:
+ buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src);
+ opencl::memory_read(handle_src.opencl_handle(), 0, buffer_src.raw_size(), buffer_src.get());
+ for (vcl_size_t i=0; i<buffer_dst.size(); ++i)
+ buffer_dst.set(i, buffer_src[i]);
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case CUDA_MEMORY:
+ buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src);
+ cuda::memory_read(handle_src.cuda_handle(), 0, buffer_src.raw_size(), buffer_src.get());
+ for (vcl_size_t i=0; i<buffer_dst.size(); ++i)
+ buffer_dst.set(i, buffer_src[i]);
+ break;
+#endif
+
+ default:
+ throw memory_exception("unsupported memory domain");
+ }
+
+ //
+ // Step 2: Write to destination
+ //
+ if (handle_dst.raw_size() == buffer_dst.raw_size())
+ viennacl::backend::memory_write(handle_dst, 0, buffer_dst.raw_size(), buffer_dst.get());
+ else
+ viennacl::backend::memory_create(handle_dst, buffer_dst.raw_size(), viennacl::traits::context(handle_dst), buffer_dst.get());
+
+ }
+ else
+ {
+ // No data conversion required.
+ typesafe_host_array<DataType> buffer(handle_src);
+
+ switch (handle_src.get_active_handle_id())
+ {
+ case MAIN_MEMORY:
+ switch (handle_dst.get_active_handle_id())
+ {
+ case MAIN_MEMORY:
+ case OPENCL_MEMORY:
+ case CUDA_MEMORY:
+ if (handle_dst.raw_size() == handle_src.raw_size())
+ viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), handle_src.ram_handle().get());
+ else
+ viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst), handle_src.ram_handle().get());
+ break;
+
+ default:
+ throw memory_exception("unsupported destination memory domain");
+ }
+ break;
+
+ case OPENCL_MEMORY:
+ switch (handle_dst.get_active_handle_id())
+ {
+ case MAIN_MEMORY:
+ if (handle_dst.raw_size() != handle_src.raw_size())
+ viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
+ viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get());
+ break;
+
+ case OPENCL_MEMORY:
+ if (handle_dst.raw_size() != handle_src.raw_size())
+ viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
+ viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size());
+ break;
+
+ case CUDA_MEMORY:
+ if (handle_dst.raw_size() != handle_src.raw_size())
+ viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
+ buffer.resize(handle_src, handle_src.raw_size() / element_size_src);
+ viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get());
+ viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get());
+ break;
+
+ default:
+ throw memory_exception("unsupported destination memory domain");
+ }
+ break;
+
+ case CUDA_MEMORY:
+ switch (handle_dst.get_active_handle_id())
+ {
+ case MAIN_MEMORY:
+ if (handle_dst.raw_size() != handle_src.raw_size())
+ viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
+ viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get());
+ break;
+
+ case OPENCL_MEMORY:
+ if (handle_dst.raw_size() != handle_src.raw_size())
+ viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
+ buffer.resize(handle_src, handle_src.raw_size() / element_size_src);
+ viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get());
+ viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get());
+ break;
+
+ case CUDA_MEMORY:
+ if (handle_dst.raw_size() != handle_src.raw_size())
+ viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
+ viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size());
+ break;
+
+ default:
+ throw memory_exception("unsupported destination memory domain");
+ }
+ break;
+
+ default:
+ throw memory_exception("unsupported source memory domain");
+ }
+
+ }
+ }
+
+
+} //backend
+
+//
+// Convenience layer:
+//
+/** @brief Generic convenience routine for migrating data of an object to a new memory domain */
+template<typename T>
+void switch_memory_context(T & obj, viennacl::context new_ctx)
+{
+ obj.switch_memory_context(new_ctx);
+}
+
+} //viennacl
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp
new file mode 100644
index 0000000..a8be55a
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp
@@ -0,0 +1,151 @@
+#ifndef VIENNACL_BACKEND_OPENCL_HPP_
+#define VIENNACL_BACKEND_OPENCL_HPP_
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/backend/opencl.hpp
+ @brief Implementations for the OpenCL backend functionality
+*/
+
+
+#include <vector>
+#include "viennacl/ocl/handle.hpp"
+#include "viennacl/ocl/backend.hpp"
+
+namespace viennacl
+{
+namespace backend
+{
+namespace opencl
+{
+
+// Requirements for backend:
+
+// * memory_create(size, host_ptr)
+// * memory_copy(src, dest, offset_src, offset_dest, size)
+// * memory_write_from_main_memory(src, offset, size,
+// dest, offset, size)
+// * memory_read_to_main_memory(src, offset, size
+// dest, offset, size)
+// *
+//
+
+/** @brief Creates an array of the specified size in the current OpenCL context. If the second argument is provided, the buffer is initialized with data from that pointer.
+ *
+ * @param size_in_bytes Number of bytes to allocate
+ * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data.
+ * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host)
+ *
+ */
+inline cl_mem memory_create(viennacl::ocl::context const & ctx, vcl_size_t size_in_bytes, const void * host_ptr = NULL)
+{
+ //std::cout << "Creating buffer (" << size_in_bytes << " bytes) host buffer " << host_ptr << " in context " << &ctx << std::endl;
+ return ctx.create_memory_without_smart_handle(CL_MEM_READ_WRITE, static_cast<unsigned int>(size_in_bytes), const_cast<void *>(host_ptr));
+}
+
+/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' in the OpenCL context to memory starting at address 'dst_buffer + dst_offset' in the same OpenCL context.
+ *
+ * @param src_buffer A smart pointer to the begin of an allocated OpenCL buffer
+ * @param dst_buffer A smart pointer to the end of an allocated OpenCL buffer
+ * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes)
+ * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes)
+ * @param bytes_to_copy Number of bytes to be copied
+ */
+inline void memory_copy(viennacl::ocl::handle<cl_mem> const & src_buffer,
+ viennacl::ocl::handle<cl_mem> & dst_buffer,
+ vcl_size_t src_offset,
+ vcl_size_t dst_offset,
+ vcl_size_t bytes_to_copy)
+{
+ assert( &src_buffer.context() == &dst_buffer.context() && bool("Transfer between memory buffers in different contexts not supported yet!"));
+
+ viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(src_buffer.context());
+ cl_int err = clEnqueueCopyBuffer(memory_context.get_queue().handle().get(),
+ src_buffer.get(),
+ dst_buffer.get(),
+ src_offset,
+ dst_offset,
+ bytes_to_copy,
+ 0, NULL, NULL); //events
+ VIENNACL_ERR_CHECK(err);
+}
+
+
+/** @brief Writes data from main RAM identified by 'ptr' to the OpenCL buffer identified by 'dst_buffer'
+ *
+ * @param dst_buffer A smart pointer to the beginning of an allocated OpenCL buffer
+ * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes)
+ * @param bytes_to_copy Number of bytes to be copied
+ * @param ptr Pointer to the first byte to be written
+ * @param async Whether the operation should be asynchronous
+ */
+inline void memory_write(viennacl::ocl::handle<cl_mem> & dst_buffer,
+ vcl_size_t dst_offset,
+ vcl_size_t bytes_to_copy,
+ const void * ptr,
+ bool async = false)
+{
+
+ viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(dst_buffer.context());
+
+#if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE)
+ std::cout << "Writing data (" << bytes_to_copy << " bytes, offset " << dst_offset << ") to OpenCL buffer " << dst_buffer.get() << " with queue " << memory_context.get_queue().handle().get() << " from " << ptr << std::endl;
+#endif
+
+ cl_int err = clEnqueueWriteBuffer(memory_context.get_queue().handle().get(),
+ dst_buffer.get(),
+ async ? CL_FALSE : CL_TRUE, //blocking
+ dst_offset,
+ bytes_to_copy,
+ ptr,
+ 0, NULL, NULL); //events
+ VIENNACL_ERR_CHECK(err);
+}
+
+
+/** @brief Reads data from an OpenCL buffer back to main RAM.
+ *
+ * @param src_buffer A smart pointer to the beginning of an allocated OpenCL source buffer
+ * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_
+ * @param bytes_to_copy Number of bytes to be read
+ * @param ptr Location in main RAM where to read data should be written to
+ * @param async Whether the operation should be asynchronous
+ */
+inline void memory_read(viennacl::ocl::handle<cl_mem> const & src_buffer,
+ vcl_size_t src_offset,
+ vcl_size_t bytes_to_copy,
+ void * ptr,
+ bool async = false)
+{
+ //std::cout << "Reading data (" << bytes_to_copy << " bytes, offset " << src_offset << ") from OpenCL buffer " << src_buffer.get() << " to " << ptr << std::endl;
+ viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(src_buffer.context());
+ cl_int err = clEnqueueReadBuffer(memory_context.get_queue().handle().get(),
+ src_buffer.get(),
+ async ? CL_FALSE : CL_TRUE, //blocking
+ src_offset,
+ bytes_to_copy,
+ ptr,
+ 0, NULL, NULL); //events
+ VIENNACL_ERR_CHECK(err);
+}
+
+
+}
+} //backend
+} //viennacl
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp
new file mode 100644
index 0000000..9aaeb2e
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp
@@ -0,0 +1,268 @@
+#ifndef VIENNACL_BACKEND_UTIL_HPP
+#define VIENNACL_BACKEND_UTIL_HPP
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/backend/util.hpp
+ @brief Helper functionality for working with different memory domains
+*/
+
+#include <vector>
+#include <cassert>
+
+#include "viennacl/forwards.h"
+#include "viennacl/backend/mem_handle.hpp"
+
+#ifdef VIENNACL_WITH_OPENCL
+#include "viennacl/backend/opencl.hpp"
+#endif
+
+
+namespace viennacl
+{
+namespace backend
+{
+namespace detail
+{
+
+ /** @brief Helper struct for converting a type to its OpenCL pendant. */
+ template<typename T>
+ struct convert_to_opencl
+ {
+ typedef T type;
+ enum { special = 0 };
+ };
+
+#ifdef VIENNACL_WITH_OPENCL
+ template<>
+ struct convert_to_opencl<unsigned int>
+ {
+ typedef cl_uint type;
+ //enum { special = (sizeof(unsigned int) != sizeof(cl_uint)) };
+ enum { special = 1 };
+ };
+
+ template<>
+ struct convert_to_opencl<int>
+ {
+ typedef cl_int type;
+ //enum { special = (sizeof(int) != sizeof(cl_int)) };
+ enum { special = 1 };
+ };
+
+
+ template<>
+ struct convert_to_opencl<unsigned long>
+ {
+ typedef cl_ulong type;
+ //enum { special = (sizeof(unsigned long) != sizeof(cl_ulong)) };
+ enum { special = 1 };
+ };
+
+ template<>
+ struct convert_to_opencl<long>
+ {
+ typedef cl_long type;
+ //enum { special = (sizeof(long) != sizeof(cl_long)) };
+ enum { special = 1 };
+ };
+#endif
+
+
+} //namespace detail
+
+
+/** @brief Helper class implementing an array on the host. Default case: No conversion necessary */
+template<typename T, bool special = detail::convert_to_opencl<T>::special>
+class typesafe_host_array
+{
+ typedef T cpu_type;
+ typedef typename detail::convert_to_opencl<T>::type target_type;
+
+public:
+ explicit typesafe_host_array() : bytes_buffer_(NULL), buffer_size_(0) {}
+
+ explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num)
+ {
+ resize(handle, num);
+ }
+
+ ~typesafe_host_array() { delete[] bytes_buffer_; }
+
+ //
+ // Setter and Getter
+ //
+ void * get() { return reinterpret_cast<void *>(bytes_buffer_); }
+ vcl_size_t raw_size() const { return buffer_size_; }
+ vcl_size_t element_size() const { return sizeof(cpu_type); }
+ vcl_size_t size() const { return buffer_size_ / element_size(); }
+ template<typename U>
+ void set(vcl_size_t index, U value)
+ {
+ reinterpret_cast<cpu_type *>(bytes_buffer_)[index] = static_cast<cpu_type>(value);
+ }
+
+ //
+ // Resize functionality
+ //
+
+ /** @brief Resize without initializing the new memory */
+ void raw_resize(mem_handle const & /*handle*/, vcl_size_t num)
+ {
+ buffer_size_ = sizeof(cpu_type) * num;
+
+ if (num > 0)
+ {
+ delete[] bytes_buffer_;
+
+ bytes_buffer_ = new char[buffer_size_];
+ }
+ }
+
+ /** @brief Resize including initialization of new memory (cf. std::vector<>) */
+ void resize(mem_handle const & handle, vcl_size_t num)
+ {
+ raw_resize(handle, num);
+
+ if (num > 0)
+ {
+ for (vcl_size_t i=0; i<buffer_size_; ++i)
+ bytes_buffer_[i] = 0;
+ }
+ }
+
+ cpu_type operator[](vcl_size_t index) const
+ {
+ assert(index < size() && bool("index out of bounds"));
+
+ return reinterpret_cast<cpu_type *>(bytes_buffer_)[index];
+ }
+
+private:
+ char * bytes_buffer_;
+ vcl_size_t buffer_size_;
+};
+
+
+
+
+/** @brief Special host array type for conversion between OpenCL types and pure CPU types */
+template<typename T>
+class typesafe_host_array<T, true>
+{
+ typedef T cpu_type;
+ typedef typename detail::convert_to_opencl<T>::type target_type;
+
+public:
+ explicit typesafe_host_array() : convert_to_opencl_( (default_memory_type() == OPENCL_MEMORY) ? true : false), bytes_buffer_(NULL), buffer_size_(0) {}
+
+ explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : convert_to_opencl_(false), bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num)
+ {
+ resize(handle, num);
+ }
+
+ ~typesafe_host_array() { delete[] bytes_buffer_; }
+
+ //
+ // Setter and Getter
+ //
+
+ template<typename U>
+ void set(vcl_size_t index, U value)
+ {
+#ifdef VIENNACL_WITH_OPENCL
+ if (convert_to_opencl_)
+ reinterpret_cast<target_type *>(bytes_buffer_)[index] = static_cast<target_type>(value);
+ else
+#endif
+ reinterpret_cast<cpu_type *>(bytes_buffer_)[index] = static_cast<cpu_type>(value);
+ }
+
+ void * get() { return reinterpret_cast<void *>(bytes_buffer_); }
+ cpu_type operator[](vcl_size_t index) const
+ {
+ assert(index < size() && bool("index out of bounds"));
+#ifdef VIENNACL_WITH_OPENCL
+ if (convert_to_opencl_)
+ return static_cast<cpu_type>(reinterpret_cast<target_type *>(bytes_buffer_)[index]);
+#endif
+ return reinterpret_cast<cpu_type *>(bytes_buffer_)[index];
+ }
+
+ vcl_size_t raw_size() const { return buffer_size_; }
+ vcl_size_t element_size() const
+ {
+#ifdef VIENNACL_WITH_OPENCL
+ if (convert_to_opencl_)
+ return sizeof(target_type);
+#endif
+ return sizeof(cpu_type);
+ }
+ vcl_size_t size() const { return buffer_size_ / element_size(); }
+
+ //
+ // Resize functionality
+ //
+
+ /** @brief Resize without initializing the new memory */
+ void raw_resize(mem_handle const & handle, vcl_size_t num)
+ {
+ buffer_size_ = sizeof(cpu_type) * num;
+ (void)handle; //silence unused variable warning if compiled without OpenCL support
+
+#ifdef VIENNACL_WITH_OPENCL
+ memory_types mem_type = handle.get_active_handle_id();
+ if (mem_type == MEMORY_NOT_INITIALIZED)
+ mem_type = default_memory_type();
+
+ if (mem_type == OPENCL_MEMORY)
+ {
+ convert_to_opencl_ = true;
+ buffer_size_ = sizeof(target_type) * num;
+ }
+#endif
+
+ if (num > 0)
+ {
+ delete[] bytes_buffer_;
+
+ bytes_buffer_ = new char[buffer_size_];
+ }
+ }
+
+ /** @brief Resize including initialization of new memory (cf. std::vector<>) */
+ void resize(mem_handle const & handle, vcl_size_t num)
+ {
+ raw_resize(handle, num);
+
+ if (num > 0)
+ {
+ for (vcl_size_t i=0; i<buffer_size_; ++i)
+ bytes_buffer_[i] = 0;
+ }
+ }
+
+private:
+ bool convert_to_opencl_;
+ char * bytes_buffer_;
+ vcl_size_t buffer_size_;
+};
+
+} //backend
+} //viennacl
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp
new file mode 100644
index 0000000..1ee13d5
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp
@@ -0,0 +1,359 @@
+#ifndef VIENNACL_CIRCULANT_MATRIX_HPP
+#define VIENNACL_CIRCULANT_MATRIX_HPP
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file circulant_matrix.hpp
+ @brief Implementation of the circulant_matrix class for efficient manipulation of circulant matrices. Experimental.
+*/
+
+#include "viennacl/forwards.h"
+#include "viennacl/vector.hpp"
+#include "viennacl/ocl/backend.hpp"
+
+#include "viennacl/linalg/circulant_matrix_operations.hpp"
+
+#include "viennacl/fft.hpp"
+
+namespace viennacl
+{
+/** @brief A Circulant matrix class
+ *
+ * @tparam NumericT The underlying scalar type (either float or double)
+ * @tparam AlignmentV The internal memory size is given by (size()/AlignmentV + 1) * AlignmentV. AlignmentV must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory.
+ */
+template<class NumericT, unsigned int AlignmentV>
+class circulant_matrix
+{
+public:
+ typedef viennacl::backend::mem_handle handle_type;
+ typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type;
+
+ /**
+ * @brief The default constructor. Does not allocate any memory.
+ *
+ */
+ explicit circulant_matrix() {}
+
+ /**
+ * @brief Creates the matrix with the given size
+ *
+ * @param rows Number of rows of the matrix
+ * @param cols Number of columns of the matrix
+ */
+ explicit circulant_matrix(vcl_size_t rows, vcl_size_t cols) : elements_(rows)
+ {
+ assert(rows == cols && bool("Circulant matrix must be square!"));
+ (void)cols; // avoid 'unused parameter' warning in optimized builds
+ }
+
+ /** @brief Resizes the matrix.
+ * Existing entries can be preserved
+ *
+ * @param sz New size of matrix
+ * @param preserve If true, existing values are preserved.
+ */
+ void resize(vcl_size_t sz, bool preserve = true)
+ {
+ elements_.resize(sz, preserve);
+ }
+
+ /** @brief Returns the OpenCL handle
+ *
+ * @return OpenCL handle
+ */
+ handle_type const & handle() const { return elements_.handle(); }
+
+ /**
+ * @brief Returns an internal viennacl::vector, which represents a circulant matrix elements
+ *
+ */
+ viennacl::vector<NumericT, AlignmentV> & elements() { return elements_; }
+ viennacl::vector<NumericT, AlignmentV> const & elements() const { return elements_; }
+
+ /**
+ * @brief Returns the number of rows of the matrix
+ */
+ vcl_size_t size1() const { return elements_.size(); }
+
+ /**
+ * @brief Returns the number of columns of the matrix
+ */
+ vcl_size_t size2() const { return elements_.size(); }
+
+ /** @brief Returns the internal size of matrix representtion.
+ * Usually required for launching OpenCL kernels only
+ *
+ * @return Internal size of matrix representation
+ */
+ vcl_size_t internal_size() const { return elements_.internal_size(); }
+
+ /**
+ * @brief Read-write access to a single element of the matrix
+ *
+ * @param row_index Row index of accessed element
+ * @param col_index Column index of accessed element
+ * @return Proxy for matrix entry
+ */
+ entry_proxy<NumericT> operator()(vcl_size_t row_index, vcl_size_t col_index)
+ {
+ long index = static_cast<long>(row_index) - static_cast<long>(col_index);
+
+ assert(row_index < size1() && col_index < size2() && bool("Invalid access"));
+
+ while (index < 0)
+ index += static_cast<long>(size1());
+ return elements_[static_cast<vcl_size_t>(index)];
+ }
+
+ /**
+ * @brief += operation for circulant matrices
+ *
+ * @param that Matrix which will be added
+ * @return Result of addition
+ */
+ circulant_matrix<NumericT, AlignmentV>& operator +=(circulant_matrix<NumericT, AlignmentV>& that)
+ {
+ elements_ += that.elements();
+ return *this;
+ }
+
+private:
+ circulant_matrix(circulant_matrix const &) {}
+ circulant_matrix & operator=(circulant_matrix const & t);
+
+ viennacl::vector<NumericT, AlignmentV> elements_;
+};
+
+/** @brief Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU)
+ *
+ *
+ * @param cpu_vec A std::vector on the host.
+ * @param gpu_mat A circulant_matrix from ViennaCL
+ */
+template<typename NumericT, unsigned int AlignmentV>
+void copy(std::vector<NumericT>& cpu_vec, circulant_matrix<NumericT, AlignmentV>& gpu_mat)
+{
+ assert( (gpu_mat.size1() == 0 || cpu_vec.size() == gpu_mat.size1()) && bool("Size mismatch"));
+ copy(cpu_vec, gpu_mat.elements());
+}
+
+/** @brief Copies a circulant matrix from the OpenCL device (either GPU or multi-core CPU) to the std::vector
+ *
+ *
+ * @param gpu_mat A circulant_matrix from ViennaCL
+ * @param cpu_vec A std::vector on the host.
+ */
+template<typename NumericT, unsigned int AlignmentV>
+void copy(circulant_matrix<NumericT, AlignmentV>& gpu_mat, std::vector<NumericT>& cpu_vec)
+{
+ assert(cpu_vec.size() == gpu_mat.size1() && bool("Size mismatch"));
+ copy(gpu_mat.elements(), cpu_vec);
+}
+
+/** @brief Copies a circulant matrix from the OpenCL device (either GPU or multi-core CPU) to the matrix-like object
+ *
+ *
+ * @param circ_src A circulant_matrix from ViennaCL
+ * @param com_dst A matrix-like object
+ */
+template<typename NumericT, unsigned int AlignmentV, typename MatrixT>
+void copy(circulant_matrix<NumericT, AlignmentV>& circ_src, MatrixT& com_dst)
+{
+ vcl_size_t size = circ_src.size1();
+ assert(size == viennacl::traits::size1(com_dst) && bool("Size mismatch"));
+ assert(size == viennacl::traits::size2(com_dst) && bool("Size mismatch"));
+ std::vector<NumericT> tmp(size);
+ copy(circ_src, tmp);
+
+ for (vcl_size_t i = 0; i < size; i++)
+ {
+ for (vcl_size_t j = 0; j < size; j++)
+ {
+ long index = static_cast<long>(i) - static_cast<long>(j);
+ if (index < 0)
+ index += static_cast<long>(size);
+ com_dst(i, j) = tmp[static_cast<vcl_size_t>(index)];
+ }
+ }
+}
+
+/** @brief Copies a the matrix-like object to the circulant matrix from the OpenCL device (either GPU or multi-core CPU)
+ *
+ *
+ * @param com_src A std::vector on the host
+ * @param circ_dst A circulant_matrix from ViennaCL
+ */
+template<typename NumericT, unsigned int AlignmentV, typename MatrixT>
+void copy(MatrixT& com_src, circulant_matrix<NumericT, AlignmentV>& circ_dst)
+{
+ assert( (circ_dst.size1() == 0 || circ_dst.size1() == viennacl::traits::size1(com_src)) && bool("Size mismatch"));
+ assert( (circ_dst.size2() == 0 || circ_dst.size2() == viennacl::traits::size2(com_src)) && bool("Size mismatch"));
+
+ vcl_size_t size = viennacl::traits::size1(com_src);
+
+ std::vector<NumericT> tmp(size);
+
+ for (vcl_size_t i = 0; i < size; i++) tmp[i] = com_src(i, 0);
+
+ copy(tmp, circ_dst);
+}
+
+/*namespace linalg
+ {
+ template<typename NumericT, unsigned int AlignmentV, unsigned int VECTOR_AlignmentV>
+ void prod_impl(circulant_matrix<NumericT, AlignmentV> const & mat,
+ vector<NumericT, VECTOR_AlignmentV> const & vec,
+ vector<NumericT, VECTOR_AlignmentV>& result) {
+ viennacl::vector<NumericT, VECTOR_AlignmentV> circ(mat.elements().size() * 2);
+ fft::real_to_complex(mat.elements(), circ, mat.elements().size());
+
+ viennacl::vector<NumericT, VECTOR_AlignmentV> tmp(vec.size() * 2);
+ viennacl::vector<NumericT, VECTOR_AlignmentV> tmp2(vec.size() * 2);
+
+ fft::real_to_complex(vec, tmp, vec.size());
+ fft::convolve(circ, tmp, tmp2);
+ fft::complex_to_real(tmp2, result, vec.size());
+ }
+ }*/
+
+/** @brief Prints the matrix. Output is compatible to boost::numeric::ublas
+ *
+ * @param s STL output stream
+ * @param gpu_matrix A ViennaCL circulant matrix
+ */
+template<class NumericT, unsigned int AlignmentV>
+std::ostream & operator<<(std::ostream& s, circulant_matrix<NumericT, AlignmentV>& gpu_matrix)
+{
+ vcl_size_t size = gpu_matrix.size1();
+ std::vector<NumericT> tmp(size);
+ copy(gpu_matrix, tmp);
+ s << "[" << size << "," << size << "](";
+
+ for (vcl_size_t i = 0; i < size; i++)
+ {
+ s << "(";
+ for (vcl_size_t j = 0; j < size; j++)
+ {
+ long index = static_cast<long>(i) - static_cast<long>(j);
+ if (index < 0) index = static_cast<long>(size) + index;
+ s << tmp[vcl_size_t(index)];
+ //s << index;
+ if (j < (size - 1)) s << ",";
+ }
+ s << ")";
+ }
+ s << ")";
+ return s;
+}
+
+//
+// Specify available operations:
+//
+
+/** \cond */
+
+namespace linalg
+{
+namespace detail
+{
+ // x = A * y
+ template<typename T, unsigned int A>
+ struct op_executor<vector_base<T>, op_assign, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> >
+ {
+ static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
+ {
+ // check for the special case x = A * x
+ if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
+ {
+ viennacl::vector<T> temp(lhs);
+ viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
+ lhs = temp;
+ }
+ else
+ viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
+ }
+ };
+
+ template<typename T, unsigned int A>
+ struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> >
+ {
+ static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
+ {
+ viennacl::vector<T> temp(lhs);
+ viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
+ lhs += temp;
+ }
+ };
+
+ template<typename T, unsigned int A>
+ struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> >
+ {
+ static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
+ {
+ viennacl::vector<T> temp(lhs);
+ viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
+ lhs -= temp;
+ }
+ };
+
+
+ // x = A * vec_op
+ template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
+ struct op_executor<vector_base<T>, op_assign, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
+ {
+ static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
+ {
+ viennacl::vector<T> temp(rhs.rhs());
+ viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);
+ }
+ };
+
+ // x = A * vec_op
+ template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
+ struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const circulant_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> >
+ {
+ static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
+ {
+ viennacl::vector<T> temp(rhs.rhs());
+ viennacl::vector<T> temp_result(lhs);
+ viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
+ lhs += temp_result;
+ }
+ };
+
+ // x = A * vec_op
+ template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
+ struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
+ {
+ static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
+ {
+ viennacl::vector<T> temp(rhs.rhs());
+ viennacl::vector<T> temp_result(lhs);
+ viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
+ lhs -= temp_result;
+ }
+ };
+
+} // namespace detail
+} // namespace linalg
+
+/** \endcond */
+}
+
+#endif // VIENNACL_CIRCULANT_MATRIX_HPP