You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ap...@apache.org on 2016/06/10 16:52:19 UTC
[14/51] [partial] mahout git commit: Revert "(nojira) add
native-viennaCL module to codebase. closes apache/mahout#241"
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/norm_inf.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_inf.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_inf.hpp
deleted file mode 100644
index 959bbd8..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_inf.hpp
+++ /dev/null
@@ -1,108 +0,0 @@
-#ifndef VIENNACL_LINALG_NORM_INF_HPP_
-#define VIENNACL_LINALG_NORM_INF_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file norm_inf.hpp
- @brief Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations.
-*/
-
-#include <cmath>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/meta/enable_if.hpp"
-#include "viennacl/meta/tag_of.hpp"
-
-namespace viennacl
-{
- //
- // generic norm_inf function
- // uses tag dispatch to identify which algorithm
- // should be called
- //
- namespace linalg
- {
-
- #ifdef VIENNACL_WITH_UBLAS
- // ----------------------------------------------------
- // UBLAS
- //
- template< typename VectorT >
- typename viennacl::enable_if< viennacl::is_ublas< typename viennacl::traits::tag_of< VectorT >::type >::value,
- typename VectorT::value_type
- >::type
- norm_inf(VectorT const& v1)
- {
- return boost::numeric::ublas::norm_inf(v1);
- }
- #endif
-
-
- // ----------------------------------------------------
- // STL
- //
- template< typename T, typename A >
- T norm_inf(std::vector<T, A> const & v1)
- {
- //std::cout << "stl .. " << std::endl;
- T result = 0;
- for (typename std::vector<T, A>::size_type i=0; i<v1.size(); ++i)
- {
- if (std::fabs(v1[i]) > result)
- result = std::fabs(v1[i]);
- }
-
- return result;
- }
-
- // ----------------------------------------------------
- // VIENNACL
- //
- template< typename ScalarType>
- viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
- const viennacl::vector_base<ScalarType>,
- viennacl::op_norm_inf >
- norm_inf(viennacl::vector_base<ScalarType> const & v1)
- {
- //std::cout << "viennacl .. " << std::endl;
- return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
- const viennacl::vector_base<ScalarType>,
- viennacl::op_norm_inf >(v1, v1);
- }
-
- // with vector expression:
- template<typename LHS, typename RHS, typename OP>
- viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>,
- const viennacl::vector_expression<const LHS, const RHS, OP>,
- viennacl::op_norm_inf>
- norm_inf(viennacl::vector_expression<const LHS, const RHS, OP> const & vector)
- {
- return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>,
- const viennacl::vector_expression<const LHS, const RHS, OP>,
- viennacl::op_norm_inf >(vector, vector);
- }
-
-
- } // end namespace linalg
-} // end namespace viennacl
-#endif
-
-
-
-
-
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/amg_operations.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/amg_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/amg_operations.hpp
deleted file mode 100644
index 7cdcf89..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/amg_operations.hpp
+++ /dev/null
@@ -1,458 +0,0 @@
-#ifndef VIENNACL_LINALG_OPENCL_AMG_OPERATIONS_HPP
-#define VIENNACL_LINALG_OPENCL_AMG_OPERATIONS_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file opencl/amg_operations.hpp
- @brief Implementations of routines for AMG in OpenCL.
-*/
-
-#include <cstdlib>
-#include <cmath>
-#include <map>
-
-#include "viennacl/linalg/detail/amg/amg_base.hpp"
-#include "viennacl/linalg/opencl/common.hpp"
-#include "viennacl/linalg/opencl/kernels/amg.hpp"
-
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace opencl
-{
-namespace amg
-{
-
-
-///////////////////////////////////////////
-
-/** @brief Routine for taking all connections in the matrix as strong */
-template<typename NumericT>
-void amg_influence_trivial(compressed_matrix<NumericT> const & A,
- viennacl::linalg::detail::amg::amg_level_context & amg_context,
- viennacl::linalg::amg_tag & tag)
-{
- (void)tag;
-
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
- viennacl::linalg::opencl::kernels::amg<NumericT>::init(ctx);
- viennacl::ocl::kernel & influence_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_influence_trivial");
-
- viennacl::ocl::enqueue(influence_kernel(A.handle1().opencl_handle(), A.handle2().opencl_handle(),
- cl_uint(A.size1()),
- cl_uint(A.nnz()),
- viennacl::traits::opencl_handle(amg_context.influence_jumper_),
- viennacl::traits::opencl_handle(amg_context.influence_ids_),
- viennacl::traits::opencl_handle(amg_context.influence_values_)
- )
- );
-}
-
-
-/** @brief Routine for extracting strongly connected points considering a user-provided threshold value */
-template<typename NumericT>
-void amg_influence_advanced(compressed_matrix<NumericT> const & A,
- viennacl::linalg::detail::amg::amg_level_context & amg_context,
- viennacl::linalg::amg_tag & tag)
-{
- (void)A; (void)amg_context; (void)tag;
- throw std::runtime_error("amg_influence_advanced() not implemented for OpenCL yet");
-}
-
-
-/** @brief Dispatcher for influence processing */
-template<typename NumericT>
-void amg_influence(compressed_matrix<NumericT> const & A,
- viennacl::linalg::detail::amg::amg_level_context & amg_context,
- viennacl::linalg::amg_tag & tag)
-{
- // TODO: dispatch based on influence tolerance provided
- amg_influence_trivial(A, amg_context, tag);
-}
-
-
-
-/** @brief Assign IDs to coarse points.
-*
-* TODO: Use exclusive_scan on GPU for this.
-*/
-inline void enumerate_coarse_points(viennacl::linalg::detail::amg::amg_level_context & amg_context)
-{
- viennacl::backend::typesafe_host_array<unsigned int> point_types(amg_context.point_types_.handle(), amg_context.point_types_.size());
- viennacl::backend::typesafe_host_array<unsigned int> coarse_ids(amg_context.coarse_id_.handle(), amg_context.coarse_id_.size());
- viennacl::backend::memory_read(amg_context.point_types_.handle(), 0, point_types.raw_size(), point_types.get());
- viennacl::backend::memory_read(amg_context.coarse_id_.handle(), 0, coarse_ids.raw_size(), coarse_ids.get());
-
- unsigned int coarse_id = 0;
- for (std::size_t i=0; i<amg_context.point_types_.size(); ++i)
- {
- coarse_ids.set(i, coarse_id);
- if (point_types[i] == viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_COARSE)
- ++coarse_id;
- }
-
- amg_context.num_coarse_ = coarse_id;
-
- viennacl::backend::memory_write(amg_context.coarse_id_.handle(), 0, coarse_ids.raw_size(), coarse_ids.get());
-}
-
-
-//////////////////////////////////////
-
-
-
-/** @brief AG (aggregation based) coarsening, single-threaded version of stage 1
-*
-* @param A Operator matrix on all levels
-* @param amg_context AMG hierarchy datastructures
-* @param tag AMG preconditioner tag
-*/
-template<typename NumericT>
-void amg_coarse_ag_stage1_mis2(compressed_matrix<NumericT> const & A,
- viennacl::linalg::detail::amg::amg_level_context & amg_context,
- viennacl::linalg::amg_tag & tag)
-{
- (void)tag;
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
- viennacl::linalg::opencl::kernels::amg<NumericT>::init(ctx);
-
- viennacl::vector<unsigned int> random_weights(A.size1(), viennacl::context(viennacl::MAIN_MEMORY));
- unsigned int *random_weights_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(random_weights.handle());
- for (std::size_t i=0; i<random_weights.size(); ++i)
- random_weights_ptr[i] = static_cast<unsigned int>(rand()) % static_cast<unsigned int>(A.size1());
- random_weights.switch_memory_context(viennacl::traits::context(A));
-
- // work vectors:
- viennacl::vector<unsigned int> work_state(A.size1(), viennacl::traits::context(A));
- viennacl::vector<unsigned int> work_random(A.size1(), viennacl::traits::context(A));
- viennacl::vector<unsigned int> work_index(A.size1(), viennacl::traits::context(A));
-
- viennacl::vector<unsigned int> work_state2(A.size1(), viennacl::traits::context(A));
- viennacl::vector<unsigned int> work_random2(A.size1(), viennacl::traits::context(A));
- viennacl::vector<unsigned int> work_index2(A.size1(), viennacl::traits::context(A));
-
- unsigned int num_undecided = static_cast<unsigned int>(A.size1());
- viennacl::vector<unsigned int> undecided_buffer(256, viennacl::traits::context(A));
- viennacl::backend::typesafe_host_array<unsigned int> undecided_buffer_host(undecided_buffer.handle(), undecided_buffer.size());
-
- viennacl::ocl::kernel & init_workdata_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_pmis2_init_workdata");
- viennacl::ocl::kernel & max_neighborhood_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_pmis2_max_neighborhood");
- viennacl::ocl::kernel & mark_mis_nodes_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_pmis2_mark_mis_nodes");
- viennacl::ocl::kernel & reset_state_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_pmis2_reset_state");
-
- unsigned int pmis_iters = 0;
- while (num_undecided > 0)
- {
- ++pmis_iters;
-
- //
- // init temporary work data:
- //
- viennacl::ocl::enqueue(init_workdata_kernel(work_state, work_random, work_index,
- amg_context.point_types_,
- random_weights,
- cl_uint(A.size1())
- )
- );
-
- //
- // Propagate maximum tuple twice
- //
- for (unsigned int r = 0; r < 2; ++r)
- {
- // max operation
- viennacl::ocl::enqueue(max_neighborhood_kernel(work_state, work_random, work_index,
- work_state2, work_random2, work_index2,
- amg_context.influence_jumper_, amg_context.influence_ids_,
- cl_uint(A.size1())
- )
- );
-
- // copy work array (can be fused into a single kernel if needed. Previous kernel is in most cases sufficiently heavy)
- work_state = work_state2;
- work_random = work_random2;
- work_index = work_index2;
- }
-
- //
- // mark MIS and non-MIS nodes:
- //
- viennacl::ocl::enqueue(mark_mis_nodes_kernel(work_state, work_index,
- amg_context.point_types_,
- undecided_buffer,
- cl_uint(A.size1())
- )
- );
-
- // get number of undecided points on host:
- viennacl::backend::memory_read(undecided_buffer.handle(), 0, undecided_buffer_host.raw_size(), undecided_buffer_host.get());
- num_undecided = 0;
- for (std::size_t i=0; i<undecided_buffer.size(); ++i)
- num_undecided += undecided_buffer_host[i];
-
- } //while
-
- viennacl::ocl::enqueue(reset_state_kernel(amg_context.point_types_, cl_uint(amg_context.point_types_.size()) ) );
-}
-
-
-
-/** @brief AG (aggregation based) coarsening. Partially single-threaded version (VIENNACL_AMG_COARSE_AG)
-*
-* @param A Operator matrix
-* @param amg_context AMG hierarchy datastructures
-* @param tag AMG preconditioner tag
-*/
-template<typename NumericT>
-void amg_coarse_ag(compressed_matrix<NumericT> const & A,
- viennacl::linalg::detail::amg::amg_level_context & amg_context,
- viennacl::linalg::amg_tag & tag)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
- viennacl::linalg::opencl::kernels::amg<NumericT>::init(ctx);
-
- amg_influence_trivial(A, amg_context, tag);
-
- //
- // Stage 1: Build aggregates:
- //
- if (tag.get_coarsening_method() == viennacl::linalg::AMG_COARSENING_METHOD_MIS2_AGGREGATION)
- amg_coarse_ag_stage1_mis2(A, amg_context, tag);
- else
- throw std::runtime_error("Only MIS2 coarsening implemented. Selected coarsening not available with OpenCL backend!");
-
- viennacl::linalg::opencl::amg::enumerate_coarse_points(amg_context);
-
- //
- // Stage 2: Propagate coarse aggregate indices to neighbors:
- //
- viennacl::ocl::kernel & propagate_coarse_indices = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_agg_propagate_coarse_indices");
- viennacl::ocl::enqueue(propagate_coarse_indices(amg_context.point_types_,
- amg_context.coarse_id_,
- amg_context.influence_jumper_,
- amg_context.influence_ids_,
- cl_uint(A.size1())
- )
- );
-
- //
- // Stage 3: Merge remaining undecided points (merging to first aggregate found when cycling over the hierarchy
- //
- viennacl::ocl::kernel & merge_undecided = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_agg_merge_undecided");
- viennacl::ocl::enqueue(merge_undecided(amg_context.point_types_,
- amg_context.coarse_id_,
- amg_context.influence_jumper_,
- amg_context.influence_ids_,
- cl_uint(A.size1())
- )
- );
-
- //
- // Stage 4: Set undecided points to fine points (coarse ID already set in Stage 3)
- // Note: Stage 3 and Stage 4 were initially fused, but are now split in order to avoid race conditions (or a fallback to sequential execution).
- //
- viennacl::ocl::kernel & merge_undecided_2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_agg_merge_undecided_2");
- viennacl::ocl::enqueue(merge_undecided_2(amg_context.point_types_, cl_uint(A.size1()) ) );
-
-}
-
-
-
-
-/** @brief Calls the right coarsening procedure
-*
-* @param A Operator matrix on all levels
-* @param amg_context AMG hierarchy datastructures
-* @param tag AMG preconditioner tag
-*/
-template<typename InternalT1>
-void amg_coarse(InternalT1 & A,
- viennacl::linalg::detail::amg::amg_level_context & amg_context,
- viennacl::linalg::amg_tag & tag)
-{
- switch (tag.get_coarsening_method())
- {
- case viennacl::linalg::AMG_COARSENING_METHOD_MIS2_AGGREGATION: amg_coarse_ag(A, amg_context, tag); break;
- default: throw std::runtime_error("not implemented yet");
- }
-}
-
-
-
-
-////////////////////////////////////// Interpolation /////////////////////////////
-
-
-/** @brief AG (aggregation based) interpolation. Multi-Threaded! (VIENNACL_INTERPOL_SA)
- *
- * @param A Operator matrix
- * @param P Prolongation matrix
- * @param amg_context AMG hierarchy datastructures
- * @param tag AMG configuration tag
-*/
-template<typename NumericT>
-void amg_interpol_ag(compressed_matrix<NumericT> const & A,
- compressed_matrix<NumericT> & P,
- viennacl::linalg::detail::amg::amg_level_context & amg_context,
- viennacl::linalg::amg_tag & tag)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
- viennacl::linalg::opencl::kernels::amg<NumericT>::init(ctx);
-
- (void)tag;
- P = compressed_matrix<NumericT>(A.size1(), amg_context.num_coarse_, A.size1(), viennacl::traits::context(A));
-
- // build matrix here
- viennacl::ocl::kernel & interpolate_ag = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_interpol_ag");
- viennacl::ocl::enqueue(interpolate_ag(P.handle1().opencl_handle(),
- P.handle2().opencl_handle(),
- P.handle().opencl_handle(),
- amg_context.coarse_id_,
- cl_uint(A.size1())
- )
- );
-
- P.generate_row_block_information();
-}
-
-/** @brief Smoothed aggregation interpolation. (VIENNACL_INTERPOL_SA)
- *
- * @param A Operator matrix
- * @param P Prolongation matrix
- * @param amg_context AMG hierarchy datastructures
- * @param tag AMG configuration tag
-*/
-template<typename NumericT>
-void amg_interpol_sa(compressed_matrix<NumericT> const & A,
- compressed_matrix<NumericT> & P,
- viennacl::linalg::detail::amg::amg_level_context & amg_context,
- viennacl::linalg::amg_tag & tag)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
- viennacl::linalg::opencl::kernels::amg<NumericT>::init(ctx);
-
- (void)tag;
- viennacl::compressed_matrix<NumericT> P_tentative(A.size1(), amg_context.num_coarse_, A.size1(), viennacl::traits::context(A));
-
- // form tentative operator:
- amg_interpol_ag(A, P_tentative, amg_context, tag);
-
- viennacl::compressed_matrix<NumericT> Jacobi(A.size1(), A.size1(), A.nnz(), viennacl::traits::context(A));
-
- viennacl::ocl::kernel & interpol_sa = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_interpol_sa");
- viennacl::ocl::enqueue(interpol_sa(A.handle1().opencl_handle(),
- A.handle2().opencl_handle(),
- A.handle().opencl_handle(),
- cl_uint(A.size1()),
- cl_uint(A.nnz()),
- Jacobi.handle1().opencl_handle(),
- Jacobi.handle2().opencl_handle(),
- Jacobi.handle().opencl_handle(),
- NumericT(tag.get_jacobi_weight())
- )
- );
-
- P = viennacl::linalg::prod(Jacobi, P_tentative);
-
- P.generate_row_block_information();
-}
-
-/** @brief Dispatcher for building the interpolation matrix
- *
- * @param A Operator matrix
- * @param P Prolongation matrix
- * @param amg_context AMG hierarchy datastructures
- * @param tag AMG configuration tag
-*/
-template<typename MatrixT>
-void amg_interpol(MatrixT const & A,
- MatrixT & P,
- viennacl::linalg::detail::amg::amg_level_context & amg_context,
- viennacl::linalg::amg_tag & tag)
-{
- switch (tag.get_interpolation_method())
- {
- case viennacl::linalg::AMG_INTERPOLATION_METHOD_AGGREGATION: amg_interpol_ag (A, P, amg_context, tag); break;
- case viennacl::linalg::AMG_INTERPOLATION_METHOD_SMOOTHED_AGGREGATION: amg_interpol_sa (A, P, amg_context, tag); break;
- default: throw std::runtime_error("Not implemented yet!");
- }
-}
-
-/** Assign sparse matrix A to dense matrix B */
-template<typename NumericT, unsigned int AlignmentV>
-void assign_to_dense(viennacl::compressed_matrix<NumericT, AlignmentV> const & A,
- viennacl::matrix_base<NumericT> & B)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
- viennacl::linalg::opencl::kernels::compressed_matrix<NumericT>::init(ctx);
- viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<NumericT>::program_name(),
- "assign_to_dense");
-
- viennacl::ocl::enqueue(k(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(),
- viennacl::traits::opencl_handle(B),
- cl_uint(viennacl::traits::start1(B)), cl_uint(viennacl::traits::start2(B)),
- cl_uint(viennacl::traits::stride1(B)), cl_uint(viennacl::traits::stride2(B)),
- cl_uint(viennacl::traits::size1(B)), cl_uint(viennacl::traits::size2(B)),
- cl_uint(viennacl::traits::internal_size1(B)), cl_uint(viennacl::traits::internal_size2(B)) ));
-
-}
-
-/** @brief Jacobi Smoother (OpenCL version)
-*
-* @param iterations Number of smoother iterations
-* @param A Operator matrix for the smoothing
-* @param x The vector smoothing is applied to
-* @param x_backup (Different) Vector holding the same values as x
-* @param rhs_smooth The right hand side of the equation for the smoother
-* @param weight Damping factor. 0: No effect of smoother. 1: Undamped Jacobi iteration
-*/
-template<typename NumericT>
-void smooth_jacobi(unsigned int iterations,
- compressed_matrix<NumericT> const & A,
- vector<NumericT> & x,
- vector<NumericT> & x_backup,
- vector<NumericT> const & rhs_smooth,
- NumericT weight)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(x).context());
- viennacl::linalg::opencl::kernels::compressed_matrix<NumericT>::init(ctx);
- viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<NumericT>::program_name(), "jacobi");
-
- for (unsigned int i=0; i<iterations; ++i)
- {
- x_backup = x;
-
- viennacl::ocl::enqueue(k(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(),
- static_cast<NumericT>(weight),
- viennacl::traits::opencl_handle(x_backup),
- viennacl::traits::opencl_handle(x),
- viennacl::traits::opencl_handle(rhs_smooth),
- static_cast<cl_uint>(rhs_smooth.size())));
-
- }
-}
-
-
-} //namespace amg
-} //namespace host_based
-} //namespace linalg
-} //namespace viennacl
-
-#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/bisect_kernel_calls.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/bisect_kernel_calls.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/bisect_kernel_calls.hpp
deleted file mode 100644
index 2fcd6fa..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/bisect_kernel_calls.hpp
+++ /dev/null
@@ -1,177 +0,0 @@
-#ifndef VIENNACL_LINALG_OPENCL_BISECT_KERNEL_CALLS_HPP_
-#define VIENNACL_LINALG_OPENCL_BISECT_KERNEL_CALLS_HPP_
-
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-
-/** @file viennacl/linalg/opencl/bisect_kernel_calls.hpp
- @brief OpenCL kernel calls for the bisection algorithm
-
- Implementation based on the sample provided with the CUDA 6.0 SDK, for which
- the creation of derivative works is allowed by including the following statement:
- "This software contains source code provided by NVIDIA Corporation."
-*/
-
-// includes, project
-#include "viennacl/linalg/opencl/kernels/bisect.hpp"
-#include "viennacl/linalg/detail/bisect/structs.hpp"
-#include "viennacl/linalg/detail/bisect/config.hpp"
-#include "viennacl/linalg/detail/bisect/util.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace opencl
-{
-const std::string BISECT_KERNEL_SMALL = "bisectKernelSmall";
-const std::string BISECT_KERNEL_LARGE = "bisectKernelLarge";
-const std::string BISECT_KERNEL_LARGE_ONE_INTERVALS = "bisectKernelLarge_OneIntervals";
-const std::string BISECT_KERNEL_LARGE_MULT_INTERVALS = "bisectKernelLarge_MultIntervals";
-
-template<typename NumericT>
-void bisectSmall(const viennacl::linalg::detail::InputData<NumericT> &input,
- viennacl::linalg::detail::ResultDataSmall<NumericT> &result,
- const unsigned int mat_size,
- const NumericT lg, const NumericT ug,
- const NumericT precision)
- {
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input.g_a).context());
- viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::init(ctx);
-
- viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::program_name(), BISECT_KERNEL_SMALL);
- kernel.global_work_size(0, 1 * VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX);
- kernel.local_work_size(0, VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX);
-
- viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(input.g_a),
- viennacl::traits::opencl_handle(input.g_b),
- static_cast<cl_uint>(mat_size),
- viennacl::traits::opencl_handle(result.vcl_g_left),
- viennacl::traits::opencl_handle(result.vcl_g_right),
- viennacl::traits::opencl_handle(result.vcl_g_left_count),
- viennacl::traits::opencl_handle(result.vcl_g_right_count),
- static_cast<NumericT>(lg),
- static_cast<NumericT>(ug),
- static_cast<cl_uint>(0),
- static_cast<cl_uint>(mat_size),
- static_cast<NumericT>(precision)
- ));
-
- }
-
-template<typename NumericT>
-void bisectLarge(const viennacl::linalg::detail::InputData<NumericT> &input,
- viennacl::linalg::detail::ResultDataLarge<NumericT> &result,
- const unsigned int mat_size,
- const NumericT lg, const NumericT ug,
- const NumericT precision)
- {
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input.g_a).context());
- viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::init(ctx);
-
- viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::program_name(), BISECT_KERNEL_LARGE);
- kernel.global_work_size(0, mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2); // Use only 128 threads for 256 < n <= 512, this
- kernel.local_work_size(0, mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2); // is reasoned
-
- viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(input.g_a),
- viennacl::traits::opencl_handle(input.g_b),
- static_cast<cl_uint>(mat_size),
- static_cast<NumericT>(lg),
- static_cast<NumericT>(ug),
- static_cast<cl_uint>(0),
- static_cast<cl_uint>(mat_size),
- static_cast<NumericT>(precision),
- viennacl::traits::opencl_handle(result.g_num_one),
- viennacl::traits::opencl_handle(result.g_num_blocks_mult),
- viennacl::traits::opencl_handle(result.g_left_one),
- viennacl::traits::opencl_handle(result.g_right_one),
- viennacl::traits::opencl_handle(result.g_pos_one),
- viennacl::traits::opencl_handle(result.g_left_mult),
- viennacl::traits::opencl_handle(result.g_right_mult),
- viennacl::traits::opencl_handle(result.g_left_count_mult),
- viennacl::traits::opencl_handle(result.g_right_count_mult),
- viennacl::traits::opencl_handle(result.g_blocks_mult),
- viennacl::traits::opencl_handle(result.g_blocks_mult_sum)
- ));
-
- }
-
-template<typename NumericT>
-void bisectLargeOneIntervals(const viennacl::linalg::detail::InputData<NumericT> &input,
- viennacl::linalg::detail::ResultDataLarge<NumericT> &result,
- const unsigned int mat_size,
- const NumericT precision)
- {
- unsigned int num_one_intervals = result.g_num_one;
- unsigned int num_blocks = viennacl::linalg::detail::getNumBlocksLinear(num_one_intervals,
- mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK: VIENNACL_BISECT_MAX_THREADS_BLOCK / 2);
-
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input.g_a).context());
- viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::init(ctx);
-
- viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::program_name(), BISECT_KERNEL_LARGE_ONE_INTERVALS);
- kernel.global_work_size(0, num_blocks * (mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2));
- kernel.local_work_size(0, mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2);
-
- viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(input.g_a),
- viennacl::traits::opencl_handle(input.g_b),
- static_cast<cl_uint>(mat_size),
- static_cast<cl_uint>(num_one_intervals),
- viennacl::traits::opencl_handle(result.g_left_one),
- viennacl::traits::opencl_handle(result.g_right_one),
- viennacl::traits::opencl_handle(result.g_pos_one),
- static_cast<NumericT>(precision)
- ));
- }
-
-
-template<typename NumericT>
-void bisectLargeMultIntervals(const viennacl::linalg::detail::InputData<NumericT> &input,
- viennacl::linalg::detail::ResultDataLarge<NumericT> &result,
- const unsigned int mat_size,
- const NumericT precision)
- {
- unsigned int num_blocks_mult = result.g_num_blocks_mult;
-
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input.g_a).context());
- viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::init(ctx);
-
- viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::program_name(), BISECT_KERNEL_LARGE_MULT_INTERVALS);
- kernel.global_work_size(0, num_blocks_mult * (mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2));
- kernel.local_work_size(0, mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2);
-
- viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(input.g_a),
- viennacl::traits::opencl_handle(input.g_b),
- static_cast<cl_uint>(mat_size),
- viennacl::traits::opencl_handle(result.g_blocks_mult),
- viennacl::traits::opencl_handle(result.g_blocks_mult_sum),
- viennacl::traits::opencl_handle(result.g_left_mult),
- viennacl::traits::opencl_handle(result.g_right_mult),
- viennacl::traits::opencl_handle(result.g_left_count_mult),
- viennacl::traits::opencl_handle(result.g_right_count_mult),
- viennacl::traits::opencl_handle(result.g_lambda_mult),
- viennacl::traits::opencl_handle(result.g_pos_mult),
- static_cast<NumericT>(precision)
- ));
- }
-} // namespace opencl
-} // namespace linalg
-} // namespace viennacl
-
-#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/common.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/common.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/common.hpp
deleted file mode 100644
index d6a288b..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/common.hpp
+++ /dev/null
@@ -1,102 +0,0 @@
-#ifndef VIENNACL_LINALG_OPENCL_COMMON_HPP_
-#define VIENNACL_LINALG_OPENCL_COMMON_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/linalg/opencl/common.hpp
- @brief Common implementations shared by OpenCL-based operations
-*/
-
-#include <cmath>
-
-#include "viennacl/forwards.h"
-#include "viennacl/ocl/platform.hpp"
-#include "viennacl/traits/handle.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace opencl
-{
-namespace detail
-{
-
-
-
-inline cl_uint make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
-{
- return static_cast<cl_uint>( ((length > 1) ? (cl_uint(length) << 2) : 0) + (reciprocal ? 2 : 0) + (flip_sign ? 1 : 0) );
-}
-
-
-/** @brief Returns the OpenCL kernel string for the operation C = A * B with A sparse, B, C dense matrices. */
-inline std::string sparse_dense_matmult_kernel_name(bool B_transposed, bool B_row_major, bool C_row_major)
-{
- if (B_transposed)
- {
- if (B_row_major && C_row_major)
- return "trans_mat_mult_row_row";
- if (B_row_major && !C_row_major)
- return "trans_mat_mult_row_col";
- if (!B_row_major && C_row_major)
- return "trans_mat_mult_col_row";
-
- return "trans_mat_mult_col_col";
- }
-
- if (B_row_major && C_row_major)
- return "mat_mult_row_row";
- if (B_row_major && !C_row_major)
- return "mat_mult_row_col";
- if (!B_row_major && C_row_major)
- return "mat_mult_col_row";
-
- return "mat_mult_col_col";
-}
-
-
-
-template<typename SomeT>
-ocl::device const & current_device(SomeT const & obj) { return traits::opencl_handle(obj).context().current_device(); }
-
-inline std::string op_to_string(op_abs) { return "abs"; }
-inline std::string op_to_string(op_acos) { return "acos"; }
-inline std::string op_to_string(op_asin) { return "asin"; }
-inline std::string op_to_string(op_atan) { return "atan"; }
-inline std::string op_to_string(op_ceil) { return "ceil"; }
-inline std::string op_to_string(op_cos) { return "cos"; }
-inline std::string op_to_string(op_cosh) { return "cosh"; }
-inline std::string op_to_string(op_exp) { return "exp"; }
-inline std::string op_to_string(op_fabs) { return "fabs"; }
-inline std::string op_to_string(op_floor) { return "floor"; }
-inline std::string op_to_string(op_log) { return "log"; }
-inline std::string op_to_string(op_log10) { return "log10"; }
-inline std::string op_to_string(op_sin) { return "sin"; }
-inline std::string op_to_string(op_sinh) { return "sinh"; }
-inline std::string op_to_string(op_sqrt) { return "sqrt"; }
-inline std::string op_to_string(op_tan) { return "tan"; }
-inline std::string op_to_string(op_tanh) { return "tanh"; }
-
-} //namespace detail
-} //namespace opencl
-} //namespace linalg
-} //namespace viennacl
-
-
-#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/direct_solve.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/direct_solve.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/direct_solve.hpp
deleted file mode 100644
index 76874b1..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/direct_solve.hpp
+++ /dev/null
@@ -1,153 +0,0 @@
-#ifndef VIENNACL_LINALG_OPENCL_DIRECT_SOLVE_HPP
-#define VIENNACL_LINALG_OPENCL_DIRECT_SOLVE_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/linalg/opencl/direct_solve.hpp
- @brief Implementations of dense direct solvers are found here.
-*/
-
-#include "viennacl/vector.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/ocl/kernel.hpp"
-#include "viennacl/ocl/device.hpp"
-#include "viennacl/ocl/handle.hpp"
-#include "viennacl/linalg/opencl/kernels/matrix_solve.hpp"
-#include "viennacl/linalg/opencl/kernels/matrix.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace opencl
-{
-
-namespace detail
-{
- inline cl_uint get_option_for_solver_tag(viennacl::linalg::upper_tag) { return 0; }
- inline cl_uint get_option_for_solver_tag(viennacl::linalg::unit_upper_tag) { return (1 << 0); }
- inline cl_uint get_option_for_solver_tag(viennacl::linalg::lower_tag) { return (1 << 2); }
- inline cl_uint get_option_for_solver_tag(viennacl::linalg::unit_lower_tag) { return (1 << 2) | (1 << 0); }
-
- template<typename MatrixT1, typename MatrixT2, typename KernelT>
- void inplace_solve_impl(MatrixT1 const & A, MatrixT2 & B, KernelT & k)
- {
- viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(A),
- cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
- cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)),
- cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
- cl_uint(viennacl::traits::internal_size1(A)), cl_uint(viennacl::traits::internal_size2(A)),
- viennacl::traits::opencl_handle(B),
- cl_uint(viennacl::traits::start1(B)), cl_uint(viennacl::traits::start2(B)),
- cl_uint(viennacl::traits::stride1(B)), cl_uint(viennacl::traits::stride2(B)),
- cl_uint(viennacl::traits::size1(B)), cl_uint(viennacl::traits::size2(B)),
- cl_uint(viennacl::traits::internal_size1(B)), cl_uint(viennacl::traits::internal_size2(B))
- )
- );
- }
-}
-
-
-//
-// Note: By convention, all size checks are performed in the calling frontend. No need to double-check here.
-//
-
-////////////////// upper triangular solver (upper_tag) //////////////////////////////////////
-/** @brief Direct inplace solver for dense triangular systems. Matlab notation: A \ B
-*
-* @param A The system matrix
-* @param B The matrix of row vectors, where the solution is directly written to
-*/
-template<typename NumericT, typename SolverTagT>
-void inplace_solve(matrix_base<NumericT> const & A,
- matrix_base<NumericT> & B,
- SolverTagT)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
-
- std::string program_name;
- if (A.row_major() && B.row_major())
- {
- typedef viennacl::linalg::opencl::kernels::matrix_solve<NumericT, row_major, row_major> KernelClass;
- KernelClass::init(ctx);
- program_name = KernelClass::program_name();
- }
- else if (A.row_major() && !B.row_major())
- {
- typedef viennacl::linalg::opencl::kernels::matrix_solve<NumericT, row_major, column_major> KernelClass;
- KernelClass::init(ctx);
- program_name = KernelClass::program_name();
- }
- else if (!A.row_major() && B.row_major())
- {
- typedef viennacl::linalg::opencl::kernels::matrix_solve<NumericT, column_major, row_major> KernelClass;
- KernelClass::init(ctx);
- program_name = KernelClass::program_name();
- }
- else
- {
- typedef viennacl::linalg::opencl::kernels::matrix_solve<NumericT, column_major, column_major> KernelClass;
- KernelClass::init(ctx);
- program_name = KernelClass::program_name();
- }
-
- std::stringstream ss;
- ss << SolverTagT::name();
- ss << "_solve";
-
- viennacl::ocl::kernel & k = ctx.get_kernel(program_name, ss.str());
-
- k.global_work_size(0, B.size2() * k.local_work_size());
- detail::inplace_solve_impl(A, B, k);
-}
-
-
-
-//
-// Solve on vector
-//
-
-template<typename NumericT, typename SOLVERTAG>
-void inplace_solve(matrix_base<NumericT> const & A,
- vector_base<NumericT> & x,
- SOLVERTAG)
-{
- cl_uint options = detail::get_option_for_solver_tag(SOLVERTAG());
-
- viennacl::ocl::kernel & k = detail::legacy_kernel_for_matrix(A, "triangular_substitute_inplace");
-
- k.global_work_size(0, k.local_work_size());
- viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(A),
- cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
- cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)),
- cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
- cl_uint(viennacl::traits::internal_size1(A)), cl_uint(viennacl::traits::internal_size2(A)),
- viennacl::traits::opencl_handle(x),
- cl_uint(viennacl::traits::start(x)),
- cl_uint(viennacl::traits::stride(x)),
- cl_uint(viennacl::traits::size(x)),
- options
- )
- );
-}
-
-} //namespace opencl
-} //namespace linalg
-} //namespace viennacl
-
-#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/fft_operations.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/fft_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/fft_operations.hpp
deleted file mode 100644
index a7b12b3..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/fft_operations.hpp
+++ /dev/null
@@ -1,350 +0,0 @@
-#ifndef VIENNACL_LINALG_OPENCL_FFT_OPERATIONS_HPP_
-#define VIENNACL_LINALG_OPENCL_FFT_OPERATIONS_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
- ============================================================================= */
-
-/** @file viennacl/linalg/opencl/fft_operations.hpp
- @brief Implementations of Fast Furier Transformation using OpenCL
- */
-
-#include "viennacl/forwards.h"
-#include "viennacl/ocl/device.hpp"
-#include "viennacl/ocl/kernel.hpp"
-#include "viennacl/traits/handle.hpp"
-#include "viennacl/traits/stride.hpp"
-#include "viennacl/linalg/host_based/fft_operations.hpp"
-#include "viennacl/linalg/opencl/kernels/fft.hpp"
-#include "viennacl/linalg/opencl/kernels/matrix.hpp"
-
-#include <viennacl/vector.hpp>
-#include <viennacl/matrix.hpp>
-
-#include <cmath>
-#include <stdexcept>
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-namespace fft
-{
-
- const vcl_size_t MAX_LOCAL_POINTS_NUM = 512;
-
- /**
- * @brief Get number of bits
- */
- inline vcl_size_t num_bits(vcl_size_t size)
- {
- vcl_size_t bits_datasize = 0;
- vcl_size_t ds = 1;
-
- while (ds < size)
- {
- ds = ds << 1;
- bits_datasize++;
- }
-
- return bits_datasize;
- }
-
- /**
- * @brief Find next power of two
- */
- inline vcl_size_t next_power_2(vcl_size_t n)
- {
- n = n - 1;
-
- vcl_size_t power = 1;
-
- while (power < sizeof(vcl_size_t) * 8)
- {
- n = n | (n >> power);
- power *= 2;
- }
-
- return n + 1;
- }
-
-} //namespce fft
-} //namespace detail
-
-namespace opencl
-{
-
-/**
- * @brief Direct algorithm for computing Fourier transformation.
- *
- * Works on any sizes of data.
- * Serial implementation has o(n^2) complexity
- */
-template<typename NumericT>
-void direct(viennacl::ocl::handle<cl_mem> const & in,
- viennacl::ocl::handle<cl_mem> const & out,
- vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign = NumericT(-1),
- viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(in.context());
- viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx);
-
- std::string program_string = viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, row_major>::program_name();
- if (data_order == viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR)
- {
- viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, column_major>::init(ctx);
- program_string =
- viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, column_major>::program_name();
- } else
- viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, row_major>::init(ctx);
-
- viennacl::ocl::kernel & k = ctx.get_kernel(program_string, "fft_direct");
- viennacl::ocl::enqueue(k(in, out,
- static_cast<cl_uint>(size),
- static_cast<cl_uint>(stride),
- static_cast<cl_uint>(batch_num),
- sign)
- );
-}
-
-/*
- * This function performs reorder of input data. Indexes are sorted in bit-reversal order.
- * Such reordering should be done before in-place FFT.
- */
-template<typename NumericT>
-void reorder(viennacl::ocl::handle<cl_mem> const & in,
- vcl_size_t size, vcl_size_t stride,
- vcl_size_t bits_datasize, vcl_size_t batch_num,
- viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(in.context());
- viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx);
-
- std::string program_string = viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, row_major>::program_name();
- if (data_order == viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR)
- {
- viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, column_major>::init(ctx);
- program_string = viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, column_major>::program_name();
- } else
- viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, row_major>::init(ctx);
-
- viennacl::ocl::kernel& k = ctx.get_kernel(program_string, "fft_reorder");
- viennacl::ocl::enqueue(k(in,
- static_cast<cl_uint>(bits_datasize), static_cast<cl_uint>(size),
- static_cast<cl_uint>(stride), static_cast<cl_uint>(batch_num))
- );
-}
-
-/**
- * @brief Radix-2 algorithm for computing Fourier transformation.
- *
- * Works only on power-of-two sizes of data.
- * Serial implementation has o(n * lg n) complexity.
- * This is a Cooley-Tukey algorithm
- */
-template<typename NumericT>
-void radix2(viennacl::ocl::handle<cl_mem> const & in,
- vcl_size_t size, vcl_size_t stride,
- vcl_size_t batch_num, NumericT sign = NumericT(-1),
- viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(in.context());
- viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx);
-
- assert(batch_num != 0 && bool("batch_num must be larger than 0"));
-
- std::string program_string = viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, row_major>::program_name();
- if (data_order == viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR)
- {
- viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, column_major>::init(ctx);
- program_string = viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, column_major>::program_name();
- } else
- viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, row_major>::init(ctx);
-
- vcl_size_t bits_datasize = viennacl::linalg::detail::fft::num_bits(size);
- if (size <= viennacl::linalg::detail::fft::MAX_LOCAL_POINTS_NUM)
- {
- viennacl::ocl::kernel & k = ctx.get_kernel(program_string, "fft_radix2_local");
- viennacl::ocl::enqueue(k(in,
- viennacl::ocl::local_mem((size * 4) * sizeof(NumericT)),
- static_cast<cl_uint>(bits_datasize), static_cast<cl_uint>(size),
- static_cast<cl_uint>(stride), static_cast<cl_uint>(batch_num), sign));
-
- }
- else
- {
- viennacl::linalg::opencl::reorder<NumericT>(in, size, stride, bits_datasize, batch_num);
-
- for (vcl_size_t step = 0; step < bits_datasize; step++)
- {
- viennacl::ocl::kernel & k = ctx.get_kernel(program_string, "fft_radix2");
- viennacl::ocl::enqueue(k(in,
- static_cast<cl_uint>(step), static_cast<cl_uint>(bits_datasize),
- static_cast<cl_uint>(size), static_cast<cl_uint>(stride),
- static_cast<cl_uint>(batch_num), sign));
- }
- }
-}
-
-/**
- * @brief Bluestein's algorithm for computing Fourier transformation.
- *
- * Currently, Works only for sizes of input data which less than 2^16.
- * Uses a lot of additional memory, but should be fast for any size of data.
- * Serial implementation has something about o(n * lg n) complexity
- */
-template<typename NumericT, unsigned int AlignmentV>
-void bluestein(viennacl::vector<NumericT, AlignmentV>& in,
- viennacl::vector<NumericT, AlignmentV>& out, vcl_size_t /*batch_num*/)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context());
- viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx);
-
- vcl_size_t size = in.size() >> 1;
- vcl_size_t ext_size = viennacl::linalg::detail::fft::next_power_2(2 * size - 1);
-
- viennacl::vector<NumericT, AlignmentV> A(ext_size << 1);
- viennacl::vector<NumericT, AlignmentV> B(ext_size << 1);
- viennacl::vector<NumericT, AlignmentV> Z(ext_size << 1);
-
- {
- viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "zero2");
- viennacl::ocl::enqueue(k(A, B, static_cast<cl_uint>(ext_size)));
- }
- {
- viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "bluestein_pre");
- viennacl::ocl::enqueue(k(in, A, B, static_cast<cl_uint>(size), static_cast<cl_uint>(ext_size)));
- }
-
- viennacl::linalg::convolve_i(A, B, Z);
-
- {
- viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "bluestein_post");
- viennacl::ocl::enqueue(k(Z, out, static_cast<cl_uint>(size)));
- }
-}
-
-/**
- * @brief Mutiply two complex vectors and store result in output
- */
-template<typename NumericT, unsigned int AlignmentV>
-void multiply_complex(viennacl::vector<NumericT, AlignmentV> const & input1,
- viennacl::vector<NumericT, AlignmentV> const & input2,
- viennacl::vector<NumericT, AlignmentV> & output)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input1).context());
- viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx);
- vcl_size_t size = input1.size() >> 1;
- viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "fft_mult_vec");
- viennacl::ocl::enqueue(k(input1, input2, output, static_cast<cl_uint>(size)));
-}
-
-/**
- * @brief Normalize vector on with his own size
- */
-template<typename NumericT, unsigned int AlignmentV>
-void normalize(viennacl::vector<NumericT, AlignmentV> & input)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context());
- viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx);
-
- viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "fft_div_vec_scalar");
-
- vcl_size_t size = input.size() >> 1;
- NumericT norm_factor = static_cast<NumericT>(size);
- viennacl::ocl::enqueue(k(input, static_cast<cl_uint>(size), norm_factor));
-}
-
-/**
- * @brief Inplace_transpose matrix
- */
-template<typename NumericT, unsigned int AlignmentV>
-void transpose(viennacl::matrix<NumericT, viennacl::row_major, AlignmentV> & input)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context());
- viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx);
-
- viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "transpose_inplace");
- viennacl::ocl::enqueue(k(input, static_cast<cl_uint>(input.internal_size1() >> 1),
- static_cast<cl_uint>(input.internal_size2()) >> 1));
-}
-
-/**
- * @brief Transpose matrix
- */
-template<typename NumericT, unsigned int AlignmentV>
-void transpose(viennacl::matrix<NumericT, viennacl::row_major, AlignmentV> const & input,
- viennacl::matrix<NumericT, viennacl::row_major, AlignmentV> & output)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context());
- viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx);
-
- viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "transpose");
- viennacl::ocl::enqueue(k(input, output, static_cast<cl_uint>(input.internal_size1() >> 1),
- static_cast<cl_uint>(input.internal_size2() >> 1)));
-}
-
-/**
- * @brief Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)
- */
-template<typename NumericT>
-void real_to_complex(viennacl::vector_base<NumericT> const & in,
- viennacl::vector_base<NumericT> & out, vcl_size_t size)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context());
- viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx);
-
- viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "real_to_complex");
- viennacl::ocl::enqueue(k(in, out, static_cast<cl_uint>(size)));
-}
-
-/**
- * @brief Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)
- */
-template<typename NumericT>
-void complex_to_real(viennacl::vector_base<NumericT> const & in,
- viennacl::vector_base<NumericT> & out, vcl_size_t size)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context());
- viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx);
-
- viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "complex_to_real");
- viennacl::ocl::enqueue(k(in, out, static_cast<cl_uint>(size)));
-}
-
-/**
- * @brief Reverse vector to oposite order and save it in input vector
- */
-template<typename NumericT>
-void reverse(viennacl::vector_base<NumericT>& in)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context());
- viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx);
-
- vcl_size_t size = in.size();
-
- viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "reverse_inplace");
- viennacl::ocl::enqueue(k(in, static_cast<cl_uint>(size)));
-}
-
-} //namespace opencl
-} //namespace linalg
-} //namespace viennacl
-
-#endif /* FFT_OPERATIONS_HPP_ */
-
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/ilu_operations.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/ilu_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/ilu_operations.hpp
deleted file mode 100644
index 248a88a..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/ilu_operations.hpp
+++ /dev/null
@@ -1,260 +0,0 @@
-#ifndef VIENNACL_LINALG_OPENCL_ILU_OPERATIONS_HPP_
-#define VIENNACL_LINALG_OPENCL_ILU_OPERATIONS_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/linalg/opencl/ilu_operations.hpp
- @brief Implementations of specialized routines for the Chow-Patel parallel ILU preconditioner using OpenCL
-*/
-
-#include <cmath>
-#include <algorithm> //for std::max and std::min
-
-#include "viennacl/forwards.h"
-#include "viennacl/scalar.hpp"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/opencl/common.hpp"
-#include "viennacl/linalg/opencl/kernels/ilu.hpp"
-#include "viennacl/meta/predicate.hpp"
-#include "viennacl/meta/enable_if.hpp"
-#include "viennacl/traits/size.hpp"
-#include "viennacl/traits/start.hpp"
-#include "viennacl/traits/stride.hpp"
-#include "viennacl/linalg/vector_operations.hpp"
-
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace opencl
-{
-
-/////////////////////// ICC /////////////////////
-
-template<typename NumericT>
-void extract_L(compressed_matrix<NumericT> const & A,
- compressed_matrix<NumericT> & L)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
- viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
-
- //
- // Step 1: Count elements in L:
- //
- viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_L_1");
-
- viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), cl_uint(A.size1()),
- L.handle1().opencl_handle())
- );
-
- //
- // Step 2: Exclusive scan on row_buffers:
- //
- viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), A.size1() + 1, 0, 1);
- viennacl::linalg::exclusive_scan(wrapped_L_row_buffer, wrapped_L_row_buffer);
- L.reserve(wrapped_L_row_buffer[L.size1()], false);
-
-
- //
- // Step 3: Write entries
- //
- viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_L_2");
-
- viennacl::ocl::enqueue(k2(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()),
- L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle())
- );
-
- L.generate_row_block_information();
-
-} // extract_LU
-
-///////////////////////////////////////////////
-
-
-
-/** @brief Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. */
-template<typename NumericT>
-void icc_scale(compressed_matrix<NumericT> const & A,
- compressed_matrix<NumericT> & L)
-{
- viennacl::vector<NumericT> D(A.size1(), viennacl::traits::context(A));
-
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
- viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
-
- // fill D:
- viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_1");
- viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()), D) );
-
- // scale L:
- viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_2");
- viennacl::ocl::enqueue(k2(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), cl_uint(A.size1()), D) );
-
-}
-
-/////////////////////////////////////
-
-
-/** @brief Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenCL (cf. Algorithm 2 in paper) */
-template<typename NumericT>
-void icc_chow_patel_sweep(compressed_matrix<NumericT> & L,
- vector<NumericT> const & aij_L)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
- viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
-
- viennacl::backend::mem_handle L_backup;
- viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L));
- viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size());
-
- viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "icc_chow_patel_sweep_kernel");
- viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), L_backup.opencl_handle(), cl_uint(L.size1()),
- aij_L)
- );
-
-}
-
-
-/////////////////////// ILU /////////////////////
-
-template<typename NumericT>
-void extract_LU(compressed_matrix<NumericT> const & A,
- compressed_matrix<NumericT> & L,
- compressed_matrix<NumericT> & U)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
- viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
-
- //
- // Step 1: Count elements in L and U:
- //
- viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_LU_1");
-
- viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), cl_uint(A.size1()),
- L.handle1().opencl_handle(),
- U.handle1().opencl_handle())
- );
-
- //
- // Step 2: Exclusive scan on row_buffers:
- //
- viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), A.size1() + 1, 0, 1);
- viennacl::linalg::exclusive_scan(wrapped_L_row_buffer, wrapped_L_row_buffer);
- L.reserve(wrapped_L_row_buffer[L.size1()], false);
-
- viennacl::vector_base<unsigned int> wrapped_U_row_buffer(U.handle1(), A.size1() + 1, 0, 1);
- viennacl::linalg::exclusive_scan(wrapped_U_row_buffer, wrapped_U_row_buffer);
- U.reserve(wrapped_U_row_buffer[U.size1()], false);
-
- //
- // Step 3: Write entries
- //
- viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_LU_2");
-
- viennacl::ocl::enqueue(k2(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()),
- L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(),
- U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle())
- );
-
- L.generate_row_block_information();
- // Note: block information for U will be generated after transposition
-
-} // extract_LU
-
-///////////////////////////////////////////////
-
-
-
-/** @brief Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. */
-template<typename NumericT>
-void ilu_scale(compressed_matrix<NumericT> const & A,
- compressed_matrix<NumericT> & L,
- compressed_matrix<NumericT> & U)
-{
- viennacl::vector<NumericT> D(A.size1(), viennacl::traits::context(A));
-
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
- viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
-
- // fill D:
- viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_1");
- viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()), D) );
-
- // scale L:
- viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_2");
- viennacl::ocl::enqueue(k2(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), cl_uint(A.size1()), D) );
-
- // scale U:
- viennacl::ocl::enqueue(k2(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(), cl_uint(A.size1()), D) );
-
-}
-
-/////////////////////////////////////
-
-
-/** @brief Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenCL (cf. Algorithm 2 in paper) */
-template<typename NumericT>
-void ilu_chow_patel_sweep(compressed_matrix<NumericT> & L,
- vector<NumericT> const & aij_L,
- compressed_matrix<NumericT> & U_trans,
- vector<NumericT> const & aij_U_trans)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
- viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
-
- viennacl::backend::mem_handle L_backup;
- viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L));
- viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size());
-
- viennacl::backend::mem_handle U_backup;
- viennacl::backend::memory_create(U_backup, U_trans.handle().raw_size(), viennacl::traits::context(U_trans));
- viennacl::backend::memory_copy(U_trans.handle(), U_backup, 0, 0, U_trans.handle().raw_size());
-
- viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_chow_patel_sweep_kernel");
- viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), L_backup.opencl_handle(), cl_uint(L.size1()),
- aij_L,
- U_trans.handle1().opencl_handle(), U_trans.handle2().opencl_handle(), U_trans.handle().opencl_handle(), U_backup.opencl_handle(),
- aij_U_trans)
- );
-
-}
-
-//////////////////////////////////////
-
-
-
-template<typename NumericT>
-void ilu_form_neumann_matrix(compressed_matrix<NumericT> & R,
- vector<NumericT> & diag_R)
-{
- viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(R).context());
- viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
-
- viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_form_neumann_matrix_kernel");
- viennacl::ocl::enqueue(k(R.handle1().opencl_handle(), R.handle2().opencl_handle(), R.handle().opencl_handle(), cl_uint(R.size1()),
- diag_R)
- );
-}
-
-} //namespace opencl
-} //namespace linalg
-} //namespace viennacl
-
-
-#endif