You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ap...@apache.org on 2016/06/08 21:40:37 UTC
[40/51] [partial] mahout git commit: (nojira) add native-viennaCL
module to codebase. closes apache/mahout#241
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp
new file mode 100644
index 0000000..0b81203
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp
@@ -0,0 +1,398 @@
+#ifndef VIENNACL_LINALG_AMG_HPP_
+#define VIENNACL_LINALG_AMG_HPP_
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/amg.hpp
+ @brief Main include file for algebraic multigrid (AMG) preconditioners. Experimental.
+
+ Implementation contributed by Markus Wagner
+*/
+
+#include <vector>
+#include <cmath>
+#include "viennacl/forwards.h"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/linalg/prod.hpp"
+#include "viennacl/linalg/direct_solve.hpp"
+#include "viennacl/compressed_matrix.hpp"
+
+#include "viennacl/linalg/detail/amg/amg_base.hpp"
+#include "viennacl/linalg/sparse_matrix_operations.hpp"
+#include "viennacl/linalg/amg_operations.hpp"
+#include "viennacl/tools/timer.hpp"
+#include "viennacl/linalg/direct_solve.hpp"
+#include "viennacl/linalg/lu.hpp"
+
+#include <map>
+
+#ifdef VIENNACL_WITH_OPENMP
+ #include <omp.h>
+#endif
+
+#define VIENNACL_AMG_MAX_LEVELS 20
+
+namespace viennacl
+{
+namespace linalg
+{
+
+class amg_coarse_problem_too_large_exception : public std::runtime_error
+{
+public:
+ amg_coarse_problem_too_large_exception(std::string const & msg, vcl_size_t num_points) : std::runtime_error(msg), c_points_(num_points) {}
+
+ /** @brief Returns the number of coarse points for which no further coarsening could be applied */
+ vcl_size_t coarse_points() const { return c_points_; }
+
+private:
+ vcl_size_t c_points_;
+};
+
+
+namespace detail
+{
+ /** @brief Sparse Galerkin product: Calculates A_coarse = trans(P)*A_fine*P = R*A_fine*P
+ *
+ * @param A_fine Operator matrix on fine grid (quadratic)
+ * @param P Prolongation/Interpolation matrix
+ * @param R Restriction matrix
+ * @param A_coarse Result matrix on coarse grid (Galerkin operator)
+ */
+ template<typename NumericT>
+ void amg_galerkin_prod(compressed_matrix<NumericT> & A_fine,
+ compressed_matrix<NumericT> & P,
+ compressed_matrix<NumericT> & R, //P^T
+ compressed_matrix<NumericT> & A_coarse)
+ {
+
+ compressed_matrix<NumericT> A_fine_times_P(viennacl::traits::context(A_fine));
+
+ // transpose P in memory (no known way of efficiently multiplying P^T * B for CSR-matrices P and B):
+ viennacl::linalg::detail::amg::amg_transpose(P, R);
+
+ // compute Galerkin product using a temporary for the result of A_fine * P
+ A_fine_times_P = viennacl::linalg::prod(A_fine, P);
+ A_coarse = viennacl::linalg::prod(R, A_fine_times_P);
+
+ }
+
+
+ /** @brief Setup AMG preconditioner
+ *
+ * @param list_of_A Operator matrices on all levels
+ * @param list_of_P Prolongation/Interpolation operators on all levels
+ * @param list_of_R Restriction operators on all levels
+ * @param list_of_amg_level_context Auxiliary datastructures for managing the grid hierarchy (coarse nodes, etc.)
+ * @param tag AMG preconditioner tag
+ */
+ template<typename NumericT, typename AMGContextListT>
+ vcl_size_t amg_setup(std::vector<compressed_matrix<NumericT> > & list_of_A,
+ std::vector<compressed_matrix<NumericT> > & list_of_P,
+ std::vector<compressed_matrix<NumericT> > & list_of_R,
+ AMGContextListT & list_of_amg_level_context,
+ amg_tag & tag)
+ {
+ // Set number of iterations. If automatic coarse grid construction is chosen (0), then set a maximum size and stop during the process.
+ vcl_size_t iterations = tag.get_coarse_levels();
+ if (iterations == 0)
+ iterations = VIENNACL_AMG_MAX_LEVELS;
+
+ for (vcl_size_t i=0; i<iterations; ++i)
+ {
+ list_of_amg_level_context[i].switch_context(tag.get_setup_context());
+ list_of_amg_level_context[i].resize(list_of_A[i].size1(), list_of_A[i].nnz());
+
+ // Construct C and F points on coarse level (i is fine level, i+1 coarse level).
+ detail::amg::amg_coarse(list_of_A[i], list_of_amg_level_context[i], tag);
+
+ // Calculate number of C and F points on level i.
+ unsigned int c_points = list_of_amg_level_context[i].num_coarse_;
+ unsigned int f_points = static_cast<unsigned int>(list_of_A[i].size1()) - c_points;
+
+ if (f_points == 0 && c_points > tag.get_coarsening_cutoff())
+ {
+ std::stringstream ss;
+ ss << "No further coarsening possible (" << c_points << " coarse points). Consider changing the strong connection threshold or increasing the coarsening cutoff." << std::endl;
+ throw amg_coarse_problem_too_large_exception(ss.str(), c_points);
+ }
+
+ // Stop routine when the maximal coarse level is found (no C or F point). Coarsest level is level i.
+ if (c_points == 0 || f_points == 0)
+ break;
+
+ // Construct interpolation matrix for level i.
+ detail::amg::amg_interpol(list_of_A[i], list_of_P[i], list_of_amg_level_context[i], tag);
+
+ // Compute coarse grid operator (A[i+1] = R * A[i] * P) with R = trans(P).
+ amg_galerkin_prod(list_of_A[i], list_of_P[i], list_of_R[i], list_of_A[i+1]);
+
+ // send matrices to target context:
+ list_of_A[i].switch_memory_context(tag.get_target_context());
+ list_of_P[i].switch_memory_context(tag.get_target_context());
+ list_of_R[i].switch_memory_context(tag.get_target_context());
+
+ // If Limit of coarse points is reached then stop. Coarsest level is level i+1.
+ if (tag.get_coarse_levels() == 0 && c_points <= tag.get_coarsening_cutoff())
+ return i+1;
+ }
+
+ return iterations;
+ }
+
+
+ /** @brief Initialize AMG preconditioner
+ *
+ * @param mat System matrix
+ * @param list_of_A Operator matrices on all levels
+ * @param list_of_P Prolongation/Interpolation operators on all levels
+ * @param list_of_R Restriction operators on all levels
+ * @param list_of_amg_level_context Auxiliary datastructures for managing the grid hierarchy (coarse nodes, etc.)
+ * @param tag AMG preconditioner tag
+ */
+ template<typename MatrixT, typename InternalT1, typename InternalT2>
+ void amg_init(MatrixT const & mat, InternalT1 & list_of_A, InternalT1 & list_of_P, InternalT1 & list_of_R, InternalT2 & list_of_amg_level_context, amg_tag & tag)
+ {
+ typedef typename InternalT1::value_type SparseMatrixType;
+
+ vcl_size_t num_levels = (tag.get_coarse_levels() > 0) ? tag.get_coarse_levels() : VIENNACL_AMG_MAX_LEVELS;
+
+ list_of_A.resize(num_levels+1, SparseMatrixType(tag.get_setup_context()));
+ list_of_P.resize(num_levels, SparseMatrixType(tag.get_setup_context()));
+ list_of_R.resize(num_levels, SparseMatrixType(tag.get_setup_context()));
+ list_of_amg_level_context.resize(num_levels);
+
+ // Insert operator matrix as operator for finest level.
+ //SparseMatrixType A0(mat);
+ //A.insert_element(0, A0);
+ list_of_A[0].switch_memory_context(viennacl::traits::context(mat));
+ list_of_A[0] = mat;
+ list_of_A[0].switch_memory_context(tag.get_setup_context());
+ }
+
+ /** @brief Setup data structures for precondition phase for later use on the GPU
+ *
+ * @param result Result vector on all levels
+ * @param result_backup Copy of result vector on all levels
+ * @param rhs RHS vector on all levels
+ * @param residual Residual vector on all levels
+ * @param A Operators matrices on all levels from setup phase
+ * @param coarse_levels Number of coarse levels for which the datastructures should be set up.
+ * @param tag AMG preconditioner tag
+ */
+ template<typename InternalVectorT, typename SparseMatrixT>
+ void amg_setup_apply(InternalVectorT & result,
+ InternalVectorT & result_backup,
+ InternalVectorT & rhs,
+ InternalVectorT & residual,
+ SparseMatrixT const & A,
+ vcl_size_t coarse_levels,
+ amg_tag const & tag)
+ {
+ typedef typename InternalVectorT::value_type VectorType;
+
+ result.resize(coarse_levels + 1);
+ result_backup.resize(coarse_levels + 1);
+ rhs.resize(coarse_levels + 1);
+ residual.resize(coarse_levels);
+
+ for (vcl_size_t level=0; level <= coarse_levels; ++level)
+ {
+ result[level] = VectorType(A[level].size1(), tag.get_target_context());
+ result_backup[level] = VectorType(A[level].size1(), tag.get_target_context());
+ rhs[level] = VectorType(A[level].size1(), tag.get_target_context());
+ }
+ for (vcl_size_t level=0; level < coarse_levels; ++level)
+ {
+ residual[level] = VectorType(A[level].size1(), tag.get_target_context());
+ }
+ }
+
+
+ /** @brief Pre-compute LU factorization for direct solve (ublas library).
+ *
+ * Speeds up precondition phase as this is computed only once overall instead of once per iteration.
+ *
+ * @param op Operator matrix for direct solve
+ * @param A Operator matrix on coarsest level
+ * @param tag AMG preconditioner tag
+ */
+ template<typename NumericT, typename SparseMatrixT>
+ void amg_lu(viennacl::matrix<NumericT> & op,
+ SparseMatrixT const & A,
+ amg_tag const & tag)
+ {
+ op.switch_memory_context(tag.get_setup_context());
+ op.resize(A.size1(), A.size2(), false);
+ viennacl::linalg::detail::amg::assign_to_dense(A, op);
+
+ viennacl::linalg::lu_factorize(op);
+ op.switch_memory_context(tag.get_target_context());
+ }
+
+}
+
+/** @brief AMG preconditioner class, can be supplied to solve()-routines
+*/
+template<typename MatrixT>
+class amg_precond;
+
+
+/** @brief AMG preconditioner class, can be supplied to solve()-routines.
+*
+* Specialization for compressed_matrix
+*/
+template<typename NumericT, unsigned int AlignmentV>
+class amg_precond< compressed_matrix<NumericT, AlignmentV> >
+{
+ typedef viennacl::compressed_matrix<NumericT, AlignmentV> SparseMatrixType;
+ typedef viennacl::vector<NumericT> VectorType;
+ typedef detail::amg::amg_level_context AMGContextType;
+
+public:
+
+ amg_precond() {}
+
+ /** @brief The constructor. Builds data structures.
+ *
+ * @param mat System matrix
+ * @param tag The AMG tag
+ */
+ amg_precond(compressed_matrix<NumericT, AlignmentV> const & mat,
+ amg_tag const & tag)
+ {
+ tag_ = tag;
+
+ // Initialize data structures.
+ detail::amg_init(mat, A_list_, P_list_, R_list_, amg_context_list_, tag_);
+ }
+
+ /** @brief Start setup phase for this class and copy data structures.
+ */
+ void setup()
+ {
+ // Start setup phase.
+ vcl_size_t num_coarse_levels = detail::amg_setup(A_list_, P_list_, R_list_, amg_context_list_, tag_);
+
+ // Setup precondition phase (Data structures).
+ detail::amg_setup_apply(result_list_, result_backup_list_, rhs_list_, residual_list_, A_list_, num_coarse_levels, tag_);
+
+ // LU factorization for direct solve.
+ detail::amg_lu(coarsest_op_, A_list_[num_coarse_levels], tag_);
+ }
+
+
+ /** @brief Precondition Operation
+ *
+ * @param vec The vector to which preconditioning is applied to
+ */
+ template<typename VectorT>
+ void apply(VectorT & vec) const
+ {
+ vcl_size_t level;
+
+ // Precondition operation (Yang, p.3).
+ rhs_list_[0] = vec;
+
+ // Part 1: Restrict down to coarsest level
+ for (level=0; level < residual_list_.size(); level++)
+ {
+ result_list_[level].clear();
+
+ // Apply Smoother presmooth_ times.
+ viennacl::linalg::detail::amg::smooth_jacobi(static_cast<unsigned int>(tag_.get_presmooth_steps()),
+ A_list_[level],
+ result_list_[level],
+ result_backup_list_[level],
+ rhs_list_[level],
+ static_cast<NumericT>(tag_.get_jacobi_weight()));
+
+ // Compute residual.
+ //residual[level] = rhs_[level] - viennacl::linalg::prod(A_[level], result_[level]);
+ residual_list_[level] = viennacl::linalg::prod(A_list_[level], result_list_[level]);
+ residual_list_[level] = rhs_list_[level] - residual_list_[level];
+
+ // Restrict to coarse level. Result is RHS of coarse level equation.
+ //residual_coarse[level] = viennacl::linalg::prod(R[level],residual[level]);
+ rhs_list_[level+1] = viennacl::linalg::prod(R_list_[level], residual_list_[level]);
+ }
+
+ // Part 2: On highest level use direct solve to solve equation (on the CPU)
+ result_list_[level] = rhs_list_[level];
+ viennacl::linalg::lu_substitute(coarsest_op_, result_list_[level]);
+
+ // Part 3: Prolongation to finest level
+ for (int level2 = static_cast<int>(residual_list_.size()-1); level2 >= 0; level2--)
+ {
+ level = static_cast<vcl_size_t>(level2);
+
+ // Interpolate error to fine level and correct solution.
+ result_backup_list_[level] = viennacl::linalg::prod(P_list_[level], result_list_[level+1]);
+ result_list_[level] += result_backup_list_[level];
+
+ // Apply Smoother postsmooth_ times.
+ viennacl::linalg::detail::amg::smooth_jacobi(static_cast<unsigned int>(tag_.get_postsmooth_steps()),
+ A_list_[level],
+ result_list_[level],
+ result_backup_list_[level],
+ rhs_list_[level],
+ static_cast<NumericT>(tag_.get_jacobi_weight()));
+ }
+ vec = result_list_[0];
+ }
+
+ /** @brief Returns the total number of multigrid levels in the hierarchy including the finest level. */
+ vcl_size_t levels() const { return residual_list_.size(); }
+
+
+ /** @brief Returns the problem/operator size at the respective multigrid level
+ *
+ * @param level Index of the multigrid level. 0 is the finest level, levels() - 1 is the coarsest level.
+ */
+ vcl_size_t size(vcl_size_t level) const
+ {
+ assert(level < levels() && bool("Level index out of bounds!"));
+ return residual_list_[level].size();
+ }
+
+ /** @brief Returns the associated preconditioner tag containing the configuration for the multigrid preconditioner. */
+ amg_tag const & tag() const { return tag_; }
+
+private:
+ std::vector<SparseMatrixType> A_list_;
+ std::vector<SparseMatrixType> P_list_;
+ std::vector<SparseMatrixType> R_list_;
+ std::vector<AMGContextType> amg_context_list_;
+
+ viennacl::matrix<NumericT> coarsest_op_;
+
+ mutable std::vector<VectorType> result_list_;
+ mutable std::vector<VectorType> result_backup_list_;
+ mutable std::vector<VectorType> rhs_list_;
+ mutable std::vector<VectorType> residual_list_;
+
+ amg_tag tag_;
+};
+
+}
+}
+
+
+
+#endif
+
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp
new file mode 100644
index 0000000..9c7f79f
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp
@@ -0,0 +1,238 @@
+#ifndef VIENNACL_LINALG_AMG_OPERATIONS_HPP_
+#define VIENNACL_LINALG_AMG_OPERATIONS_HPP_
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the PDF manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/amg_operations.hpp
+ @brief Implementations of operations for algebraic multigrid
+*/
+
+#include "viennacl/forwards.h"
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/linalg/detail/amg/amg_base.hpp"
+#include "viennacl/linalg/host_based/amg_operations.hpp"
+
+#ifdef VIENNACL_WITH_OPENCL
+ #include "viennacl/linalg/opencl/amg_operations.hpp"
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+ #include "viennacl/linalg/cuda/amg_operations.hpp"
+#endif
+
+namespace viennacl
+{
+namespace linalg
+{
+namespace detail
+{
+namespace amg
+{
+
+template<typename NumericT, typename AMGContextT>
+void amg_influence(compressed_matrix<NumericT> const & A, AMGContextT & amg_context, amg_tag & tag)
+{
+ switch (viennacl::traits::handle(A).get_active_handle_id())
+ {
+ case viennacl::MAIN_MEMORY:
+ viennacl::linalg::host_based::amg::amg_influence(A, amg_context, tag);
+ break;
+#ifdef VIENNACL_WITH_OPENCL
+ case viennacl::OPENCL_MEMORY:
+ viennacl::linalg::opencl::amg::amg_influence(A, amg_context, tag);
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case viennacl::CUDA_MEMORY:
+ viennacl::linalg::cuda::amg::amg_influence(A, amg_context, tag);
+ break;
+#endif
+ case viennacl::MEMORY_NOT_INITIALIZED:
+ throw memory_exception("not initialised!");
+ default:
+ throw memory_exception("not implemented");
+ }
+}
+
+
+template<typename NumericT, typename AMGContextT>
+void amg_coarse(compressed_matrix<NumericT> const & A, AMGContextT & amg_context, amg_tag & tag)
+{
+ switch (viennacl::traits::handle(A).get_active_handle_id())
+ {
+ case viennacl::MAIN_MEMORY:
+ viennacl::linalg::host_based::amg::amg_coarse(A, amg_context, tag);
+ break;
+#ifdef VIENNACL_WITH_OPENCL
+ case viennacl::OPENCL_MEMORY:
+ viennacl::linalg::opencl::amg::amg_coarse(A, amg_context, tag);
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case viennacl::CUDA_MEMORY:
+ viennacl::linalg::cuda::amg::amg_coarse(A, amg_context, tag);
+ break;
+#endif
+ case viennacl::MEMORY_NOT_INITIALIZED:
+ throw memory_exception("not initialised!");
+ default:
+ throw memory_exception("not implemented");
+ }
+}
+
+
+template<typename NumericT, typename AMGContextT>
+void amg_interpol(compressed_matrix<NumericT> const & A,
+ compressed_matrix<NumericT> & P,
+ AMGContextT & amg_context,
+ amg_tag & tag)
+{
+ switch (viennacl::traits::handle(A).get_active_handle_id())
+ {
+ case viennacl::MAIN_MEMORY:
+ viennacl::linalg::host_based::amg::amg_interpol(A, P, amg_context, tag);
+ break;
+#ifdef VIENNACL_WITH_OPENCL
+ case viennacl::OPENCL_MEMORY:
+ viennacl::linalg::opencl::amg::amg_interpol(A, P, amg_context, tag);
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case viennacl::CUDA_MEMORY:
+ viennacl::linalg::cuda::amg::amg_interpol(A, P, amg_context, tag);
+ break;
+#endif
+ case viennacl::MEMORY_NOT_INITIALIZED:
+ throw memory_exception("not initialised!");
+ default:
+ throw memory_exception("not implemented");
+ }
+}
+
+
+template<typename NumericT>
+void amg_transpose(compressed_matrix<NumericT> & A,
+ compressed_matrix<NumericT> & B)
+{
+ viennacl::context orig_ctx = viennacl::traits::context(A);
+ viennacl::context cpu_ctx(viennacl::MAIN_MEMORY);
+ (void)orig_ctx;
+ (void)cpu_ctx;
+
+ switch (viennacl::traits::handle(A).get_active_handle_id())
+ {
+ case viennacl::MAIN_MEMORY:
+ viennacl::linalg::host_based::amg::amg_transpose(A, B);
+ break;
+#ifdef VIENNACL_WITH_OPENCL
+ case viennacl::OPENCL_MEMORY:
+ A.switch_memory_context(cpu_ctx);
+ B.switch_memory_context(cpu_ctx);
+ viennacl::linalg::host_based::amg::amg_transpose(A, B);
+ A.switch_memory_context(orig_ctx);
+ B.switch_memory_context(orig_ctx);
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case viennacl::CUDA_MEMORY:
+ A.switch_memory_context(cpu_ctx);
+ B.switch_memory_context(cpu_ctx);
+ viennacl::linalg::host_based::amg::amg_transpose(A, B);
+ A.switch_memory_context(orig_ctx);
+ B.switch_memory_context(orig_ctx);
+ //viennacl::linalg::cuda::amg_transpose(A, B);
+ break;
+#endif
+ case viennacl::MEMORY_NOT_INITIALIZED:
+ throw memory_exception("not initialised!");
+ default:
+ throw memory_exception("not implemented");
+ }
+}
+
+/** Assign sparse matrix A to dense matrix B */
+template<typename SparseMatrixType, typename NumericT>
+typename viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type
+assign_to_dense(SparseMatrixType const & A,
+ viennacl::matrix_base<NumericT> & B)
+{
+ assert( (A.size1() == B.size1()) && bool("Size check failed for assignment to dense matrix: size1(A) != size1(B)"));
+ assert( (A.size2() == B.size1()) && bool("Size check failed for assignment to dense matrix: size2(A) != size2(B)"));
+
+ switch (viennacl::traits::handle(A).get_active_handle_id())
+ {
+ case viennacl::MAIN_MEMORY:
+ viennacl::linalg::host_based::amg::assign_to_dense(A, B);
+ break;
+#ifdef VIENNACL_WITH_OPENCL
+ case viennacl::OPENCL_MEMORY:
+ viennacl::linalg::opencl::amg::assign_to_dense(A, B);
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case viennacl::CUDA_MEMORY:
+ viennacl::linalg::cuda::amg::assign_to_dense(A, B);
+ break;
+#endif
+ case viennacl::MEMORY_NOT_INITIALIZED:
+ throw memory_exception("not initialised!");
+ default:
+ throw memory_exception("not implemented");
+ }
+}
+
+template<typename NumericT>
+void smooth_jacobi(unsigned int iterations,
+ compressed_matrix<NumericT> const & A,
+ vector<NumericT> & x,
+ vector<NumericT> & x_backup,
+ vector<NumericT> const & rhs_smooth,
+ NumericT weight)
+{
+ switch (viennacl::traits::handle(A).get_active_handle_id())
+ {
+ case viennacl::MAIN_MEMORY:
+ viennacl::linalg::host_based::amg::smooth_jacobi(iterations, A, x, x_backup, rhs_smooth, weight);
+ break;
+#ifdef VIENNACL_WITH_OPENCL
+ case viennacl::OPENCL_MEMORY:
+ viennacl::linalg::opencl::amg::smooth_jacobi(iterations, A, x, x_backup, rhs_smooth, weight);
+ break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+ case viennacl::CUDA_MEMORY:
+ viennacl::linalg::cuda::amg::smooth_jacobi(iterations, A, x, x_backup, rhs_smooth, weight);
+ break;
+#endif
+ case viennacl::MEMORY_NOT_INITIALIZED:
+ throw memory_exception("not initialised!");
+ default:
+ throw memory_exception("not implemented");
+ }
+}
+
+} //namespace amg
+} //namespace detail
+} //namespace linalg
+} //namespace viennacl
+
+
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp
new file mode 100644
index 0000000..57bc89a
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp
@@ -0,0 +1,598 @@
+#ifndef VIENNACL_LINALG_BICGSTAB_HPP_
+#define VIENNACL_LINALG_BICGSTAB_HPP_
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file bicgstab.hpp
+ @brief The stabilized bi-conjugate gradient method is implemented here
+*/
+
+#include <vector>
+#include <cmath>
+#include <numeric>
+
+#include "viennacl/forwards.h"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/linalg/prod.hpp"
+#include "viennacl/linalg/inner_prod.hpp"
+#include "viennacl/linalg/norm_2.hpp"
+#include "viennacl/traits/clear.hpp"
+#include "viennacl/traits/size.hpp"
+#include "viennacl/traits/context.hpp"
+#include "viennacl/meta/result_of.hpp"
+#include "viennacl/linalg/iterative_operations.hpp"
+
+namespace viennacl
+{
+namespace linalg
+{
+
+/** @brief A tag for the stabilized Bi-conjugate gradient solver. Used for supplying solver parameters and for dispatching the solve() function
+*/
+class bicgstab_tag
+{
+public:
+ /** @brief The constructor
+ *
+ * @param tol Relative tolerance for the residual (solver quits if ||r|| < tol * ||r_initial||)
+ * @param max_iters The maximum number of iterations
+ * @param max_iters_before_restart The maximum number of iterations before BiCGStab is reinitialized (to avoid accumulation of round-off errors)
+ */
+ bicgstab_tag(double tol = 1e-8, vcl_size_t max_iters = 400, vcl_size_t max_iters_before_restart = 200)
+ : tol_(tol), abs_tol_(0), iterations_(max_iters), iterations_before_restart_(max_iters_before_restart) {}
+
+ /** @brief Returns the relative tolerance */
+ double tolerance() const { return tol_; }
+
+ /** @brief Returns the absolute tolerance */
+ double abs_tolerance() const { return abs_tol_; }
+ /** @brief Sets the absolute tolerance */
+ void abs_tolerance(double new_tol) { if (new_tol >= 0) abs_tol_ = new_tol; }
+
+ /** @brief Returns the maximum number of iterations */
+ vcl_size_t max_iterations() const { return iterations_; }
+ /** @brief Returns the maximum number of iterations before a restart*/
+ vcl_size_t max_iterations_before_restart() const { return iterations_before_restart_; }
+
+ /** @brief Return the number of solver iterations: */
+ vcl_size_t iters() const { return iters_taken_; }
+ void iters(vcl_size_t i) const { iters_taken_ = i; }
+
+ /** @brief Returns the estimated relative error at the end of the solver run */
+ double error() const { return last_error_; }
+ /** @brief Sets the estimated relative error at the end of the solver run */
+ void error(double e) const { last_error_ = e; }
+
+private:
+ double tol_;
+ double abs_tol_;
+ vcl_size_t iterations_;
+ vcl_size_t iterations_before_restart_;
+
+ //return values from solver
+ mutable vcl_size_t iters_taken_;
+ mutable double last_error_;
+};
+
+
+
+namespace detail
+{
+ /** @brief Implementation of a pipelined stabilized Bi-conjugate gradient solver */
+ template<typename MatrixT, typename NumericT>
+ viennacl::vector<NumericT> pipelined_solve(MatrixT const & A, //MatrixType const & A,
+ viennacl::vector_base<NumericT> const & rhs,
+ bicgstab_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ viennacl::vector<NumericT> result = viennacl::zero_vector<NumericT>(rhs.size(), viennacl::traits::context(rhs));
+
+ viennacl::vector<NumericT> residual = rhs;
+ viennacl::vector<NumericT> p = rhs;
+ viennacl::vector<NumericT> r0star = rhs;
+ viennacl::vector<NumericT> Ap = rhs;
+ viennacl::vector<NumericT> s = rhs;
+ viennacl::vector<NumericT> As = rhs;
+
+ // Layout of temporary buffer:
+ // chunk 0: <residual, r_0^*>
+ // chunk 1: <As, As>
+ // chunk 2: <As, s>
+ // chunk 3: <Ap, r_0^*>
+ // chunk 4: <As, r_0^*>
+ // chunk 5: <s, s>
+ vcl_size_t buffer_size_per_vector = 256;
+ vcl_size_t num_buffer_chunks = 6;
+ viennacl::vector<NumericT> inner_prod_buffer = viennacl::zero_vector<NumericT>(num_buffer_chunks*buffer_size_per_vector, viennacl::traits::context(rhs)); // temporary buffer
+ std::vector<NumericT> host_inner_prod_buffer(inner_prod_buffer.size());
+
+ NumericT norm_rhs_host = viennacl::linalg::norm_2(residual);
+ NumericT beta;
+ NumericT alpha;
+ NumericT omega;
+ NumericT residual_norm = norm_rhs_host;
+ inner_prod_buffer[0] = norm_rhs_host * norm_rhs_host;
+
+ NumericT r_dot_r0 = 0;
+ NumericT As_dot_As = 0;
+ NumericT As_dot_s = 0;
+ NumericT Ap_dot_r0 = 0;
+ NumericT As_dot_r0 = 0;
+ NumericT s_dot_s = 0;
+
+ if (norm_rhs_host <= tag.abs_tolerance()) //solution is zero if RHS norm is zero
+ return result;
+
+ for (vcl_size_t i = 0; i < tag.max_iterations(); ++i)
+ {
+ tag.iters(i+1);
+ // Ap = A*p_j
+ // Ap_dot_r0 = <Ap, r_0^*>
+ viennacl::linalg::pipelined_bicgstab_prod(A, p, Ap, r0star,
+ inner_prod_buffer, buffer_size_per_vector, 3*buffer_size_per_vector);
+
+ //////// first (weak) synchronization point ////
+
+ ///// method 1: compute alpha on host:
+ //
+ //// we only need the second chunk of the buffer for computing Ap_dot_r0:
+ //viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin());
+ //Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + buffer_size_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_size_per_vector, ScalarType(0));
+
+ //alpha = residual_dot_r0 / Ap_dot_r0;
+
+ //// s_j = r_j - alpha_j q_j
+ //s = residual - alpha * Ap;
+
+ ///// method 2: compute alpha on device:
+ // s = r - alpha * Ap
+ // <s, s> first stage
+ // dump alpha at end of inner_prod_buffer
+ viennacl::linalg::pipelined_bicgstab_update_s(s, residual, Ap,
+ inner_prod_buffer, buffer_size_per_vector, 5*buffer_size_per_vector);
+
+ // As = A*s_j
+ // As_dot_As = <As, As>
+ // As_dot_s = <As, s>
+ // As_dot_r0 = <As, r_0^*>
+ viennacl::linalg::pipelined_bicgstab_prod(A, s, As, r0star,
+ inner_prod_buffer, buffer_size_per_vector, 4*buffer_size_per_vector);
+
+ //////// second (strong) synchronization point ////
+
+ viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin());
+
+ typedef typename std::vector<NumericT>::difference_type difference_type;
+
+ r_dot_r0 = std::accumulate(host_inner_prod_buffer.begin(), host_inner_prod_buffer.begin() + difference_type( buffer_size_per_vector), NumericT(0));
+ As_dot_As = std::accumulate(host_inner_prod_buffer.begin() + difference_type( buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(2 * buffer_size_per_vector), NumericT(0));
+ As_dot_s = std::accumulate(host_inner_prod_buffer.begin() + difference_type(2 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(3 * buffer_size_per_vector), NumericT(0));
+ Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + difference_type(3 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(4 * buffer_size_per_vector), NumericT(0));
+ As_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + difference_type(4 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(5 * buffer_size_per_vector), NumericT(0));
+ s_dot_s = std::accumulate(host_inner_prod_buffer.begin() + difference_type(5 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(6 * buffer_size_per_vector), NumericT(0));
+
+ alpha = r_dot_r0 / Ap_dot_r0;
+ beta = - As_dot_r0 / Ap_dot_r0;
+ omega = As_dot_s / As_dot_As;
+
+ residual_norm = std::sqrt(s_dot_s - NumericT(2.0) * omega * As_dot_s + omega * omega * As_dot_As);
+ if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), monitor_data))
+ break;
+ if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance() || residual_norm < tag.abs_tolerance())
+ break;
+
+ // x_{j+1} = x_j + alpha * p_j + omega * s_j
+ // r_{j+1} = s_j - omega * t_j
+ // p_{j+1} = r_{j+1} + beta * (p_j - omega * q_j)
+ // and compute first stage of r_dot_r0 = <r_{j+1}, r_o^*> for use in next iteration
+ viennacl::linalg::pipelined_bicgstab_vector_update(result, alpha, p, omega, s,
+ residual, As,
+ beta, Ap,
+ r0star, inner_prod_buffer, buffer_size_per_vector);
+ }
+
+ //store last error estimate:
+ tag.error(residual_norm / norm_rhs_host);
+
+ return result;
+ }
+
+ /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
+ template<typename NumericT>
+ viennacl::vector<NumericT> solve_impl(viennacl::compressed_matrix<NumericT> const & A,
+ viennacl::vector<NumericT> const & rhs,
+ bicgstab_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ return pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
+ }
+
+
+ /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
+ template<typename NumericT>
+ viennacl::vector<NumericT> solve_impl(viennacl::coordinate_matrix<NumericT> const & A,
+ viennacl::vector<NumericT> const & rhs,
+ bicgstab_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
+ }
+
+
+
+ /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
+ template<typename NumericT>
+ viennacl::vector<NumericT> solve_impl(viennacl::ell_matrix<NumericT> const & A,
+ viennacl::vector<NumericT> const & rhs,
+ bicgstab_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
+ }
+
+
+
+ /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
+ template<typename NumericT>
+ viennacl::vector<NumericT> solve_impl(viennacl::sliced_ell_matrix<NumericT> const & A,
+ viennacl::vector<NumericT> const & rhs,
+ bicgstab_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
+ }
+
+
+ /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
+ template<typename NumericT>
+ viennacl::vector<NumericT> solve_impl(viennacl::hyb_matrix<NumericT> const & A,
+ viennacl::vector<NumericT> const & rhs,
+ bicgstab_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
+ }
+
+
+ /** @brief Implementation of the unpreconditioned stabilized Bi-conjugate gradient solver
+ *
+ * Following the description in "Iterative Methods for Sparse Linear Systems" by Y. Saad
+ *
+ * @param matrix The system matrix
+ * @param rhs The load vector
+ * @param tag Solver configuration tag
+ * @param monitor A callback routine which is called at each GMRES restart
+ * @param monitor_data Data pointer to be passed to the callback routine to pass on user-specific data
+ * @return The result vector
+ */
+ template<typename MatrixT, typename VectorT>
+ VectorT solve_impl(MatrixT const & matrix,
+ VectorT const & rhs,
+ bicgstab_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ typedef typename viennacl::result_of::value_type<VectorT>::type NumericType;
+ typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType;
+ VectorT result = rhs;
+ viennacl::traits::clear(result);
+
+ VectorT residual = rhs;
+ VectorT p = rhs;
+ VectorT r0star = rhs;
+ VectorT tmp0 = rhs;
+ VectorT tmp1 = rhs;
+ VectorT s = rhs;
+
+ CPU_NumericType norm_rhs_host = viennacl::linalg::norm_2(residual);
+ CPU_NumericType ip_rr0star = norm_rhs_host * norm_rhs_host;
+ CPU_NumericType beta;
+ CPU_NumericType alpha;
+ CPU_NumericType omega;
+ //ScalarType inner_prod_temp; //temporary variable for inner product computation
+ CPU_NumericType new_ip_rr0star = 0;
+ CPU_NumericType residual_norm = norm_rhs_host;
+
+ if (norm_rhs_host <= tag.abs_tolerance()) //solution is zero if RHS norm is zero
+ return result;
+
+ bool restart_flag = true;
+ vcl_size_t last_restart = 0;
+ for (vcl_size_t i = 0; i < tag.max_iterations(); ++i)
+ {
+ if (restart_flag)
+ {
+ residual = viennacl::linalg::prod(matrix, result);
+ residual = rhs - residual;
+ p = residual;
+ r0star = residual;
+ ip_rr0star = viennacl::linalg::norm_2(residual);
+ ip_rr0star *= ip_rr0star;
+ restart_flag = false;
+ last_restart = i;
+ }
+
+ tag.iters(i+1);
+ tmp0 = viennacl::linalg::prod(matrix, p);
+ alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);
+
+ s = residual - alpha*tmp0;
+
+ tmp1 = viennacl::linalg::prod(matrix, s);
+ CPU_NumericType norm_tmp1 = viennacl::linalg::norm_2(tmp1);
+ omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1);
+
+ result += alpha * p + omega * s;
+ residual = s - omega * tmp1;
+
+ new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);
+ residual_norm = viennacl::linalg::norm_2(residual);
+ if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), monitor_data))
+ break;
+ if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance() || residual_norm < tag.abs_tolerance())
+ break;
+
+ beta = new_ip_rr0star / ip_rr0star * alpha/omega;
+ ip_rr0star = new_ip_rr0star;
+
+ if ( (ip_rr0star <= 0 && ip_rr0star >= 0)
+ || (omega <= 0 && omega >= 0)
+ || (i - last_restart > tag.max_iterations_before_restart())
+ ) //search direction degenerate. A restart might help
+ restart_flag = true;
+
+ // Execution of
+ // p = residual + beta * (p - omega*tmp0);
+ // without introducing temporary vectors:
+ p -= omega * tmp0;
+ p = residual + beta * p;
+ }
+
+ //store last error estimate:
+ tag.error(residual_norm / norm_rhs_host);
+
+ return result;
+ }
+
+
+ /** @brief Implementation of the preconditioned stabilized Bi-conjugate gradient solver
+ *
+ * Following the description of the unpreconditioned case in "Iterative Methods for Sparse Linear Systems" by Y. Saad
+ *
+ * @param matrix The system matrix
+ * @param rhs The load vector
+ * @param tag Solver configuration tag
+ * @param precond A preconditioner. Precondition operation is done via member function apply()
+ * @param monitor A callback routine which is called at each GMRES restart
+ * @param monitor_data Data pointer to be passed to the callback routine to pass on user-specific data
+ * @return The result vector
+ */
+ template<typename MatrixT, typename VectorT, typename PreconditionerT>
+ VectorT solve_impl(MatrixT const & matrix,
+ VectorT const & rhs,
+ bicgstab_tag const & tag,
+ PreconditionerT const & precond,
+ bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ typedef typename viennacl::result_of::value_type<VectorT>::type NumericType;
+ typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType;
+ VectorT result = rhs;
+ viennacl::traits::clear(result);
+
+ VectorT residual = rhs;
+ VectorT r0star = residual; //can be chosen arbitrarily in fact
+ VectorT tmp0 = rhs;
+ VectorT tmp1 = rhs;
+ VectorT s = rhs;
+
+ VectorT p = residual;
+
+ CPU_NumericType ip_rr0star = viennacl::linalg::norm_2(residual);
+ CPU_NumericType norm_rhs_host = viennacl::linalg::norm_2(residual);
+ CPU_NumericType beta;
+ CPU_NumericType alpha;
+ CPU_NumericType omega;
+ CPU_NumericType new_ip_rr0star = 0;
+ CPU_NumericType residual_norm = norm_rhs_host;
+
+ if (norm_rhs_host <= tag.abs_tolerance()) //solution is zero if RHS norm is zero
+ return result;
+
+ bool restart_flag = true;
+ vcl_size_t last_restart = 0;
+ for (unsigned int i = 0; i < tag.max_iterations(); ++i)
+ {
+ if (restart_flag)
+ {
+ residual = viennacl::linalg::prod(matrix, result);
+ residual = rhs - residual;
+ precond.apply(residual);
+ p = residual;
+ r0star = residual;
+ ip_rr0star = viennacl::linalg::norm_2(residual);
+ ip_rr0star *= ip_rr0star;
+ restart_flag = false;
+ last_restart = i;
+ }
+
+ tag.iters(i+1);
+ tmp0 = viennacl::linalg::prod(matrix, p);
+ precond.apply(tmp0);
+ alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);
+
+ s = residual - alpha*tmp0;
+
+ tmp1 = viennacl::linalg::prod(matrix, s);
+ precond.apply(tmp1);
+ CPU_NumericType norm_tmp1 = viennacl::linalg::norm_2(tmp1);
+ omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1);
+
+ result += alpha * p + omega * s;
+ residual = s - omega * tmp1;
+
+ residual_norm = viennacl::linalg::norm_2(residual);
+ if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), monitor_data))
+ break;
+ if (residual_norm / norm_rhs_host < tag.tolerance() || residual_norm < tag.abs_tolerance())
+ break;
+
+ new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);
+
+ beta = new_ip_rr0star / ip_rr0star * alpha/omega;
+ ip_rr0star = new_ip_rr0star;
+
+ if ( (ip_rr0star >= 0 && ip_rr0star <= 0) || (omega >=0 && omega <= 0) || i - last_restart > tag.max_iterations_before_restart()) //search direction degenerate. A restart might help
+ restart_flag = true;
+
+ // Execution of
+ // p = residual + beta * (p - omega*tmp0);
+ // without introducing temporary vectors:
+ p -= omega * tmp0;
+ p = residual + beta * p;
+
+ //std::cout << "Rel. Residual in current step: " << std::sqrt(std::fabs(viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host)) << std::endl;
+ }
+
+ //store last error estimate:
+ tag.error(residual_norm / norm_rhs_host);
+
+ return result;
+ }
+
+}
+
+
+
+template<typename MatrixT, typename VectorT, typename PreconditionerT>
+VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const & tag, PreconditionerT const & precond)
+{
+ return detail::solve_impl(matrix, rhs, tag, precond);
+}
+
+
+/** @brief Convenience overload for calling the preconditioned BiCGStab solver using types from the C++ STL.
+ *
+ * A std::vector<std::map<T, U> > matrix is convenient for e.g. finite element assembly.
+ * It is not the fastest option for setting up a system, but often it is fast enough - particularly for just trying things out.
+ */
+template<typename IndexT, typename NumericT, typename PreconditionerT>
+std::vector<NumericT> solve(std::vector< std::map<IndexT, NumericT> > const & A, std::vector<NumericT> const & rhs, bicgstab_tag const & tag, PreconditionerT const & precond)
+{
+ viennacl::compressed_matrix<NumericT> vcl_A;
+ viennacl::copy(A, vcl_A);
+
+ viennacl::vector<NumericT> vcl_rhs(rhs.size());
+ viennacl::copy(rhs, vcl_rhs);
+
+ viennacl::vector<NumericT> vcl_result = solve(vcl_A, vcl_rhs, tag, precond);
+
+ std::vector<NumericT> result(vcl_result.size());
+ viennacl::copy(vcl_result, result);
+ return result;
+}
+
+/** @brief Entry point for the unpreconditioned BiCGStab method.
+ *
+ * @param matrix The system matrix
+ * @param rhs Right hand side vector (load vector)
+ * @param tag A BiCGStab tag providing relative tolerances, etc.
+ */
+template<typename MatrixT, typename VectorT>
+VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const & tag)
+{
+ return solve(matrix, rhs, tag, viennacl::linalg::no_precond());
+}
+
+
+
+template<typename VectorT>
+class bicgstab_solver
+{
+public:
+ typedef typename viennacl::result_of::cpu_value_type<VectorT>::type numeric_type;
+
+ bicgstab_solver(bicgstab_tag const & tag) : tag_(tag), monitor_callback_(NULL), user_data_(NULL) {}
+
+ template<typename MatrixT, typename PreconditionerT>
+ VectorT operator()(MatrixT const & A, VectorT const & b, PreconditionerT const & precond) const
+ {
+ if (viennacl::traits::size(init_guess_) > 0) // take initial guess into account
+ {
+ VectorT mod_rhs = viennacl::linalg::prod(A, init_guess_);
+ mod_rhs = b - mod_rhs;
+ VectorT y = detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_);
+ return init_guess_ + y;
+ }
+ return detail::solve_impl(A, b, tag_, precond, monitor_callback_, user_data_);
+ }
+
+
+ template<typename MatrixT>
+ VectorT operator()(MatrixT const & A, VectorT const & b) const
+ {
+ return operator()(A, b, viennacl::linalg::no_precond());
+ }
+
+ /** @brief Specifies an initial guess for the iterative solver.
+ *
+ * An iterative solver for Ax = b with initial guess x_0 is equivalent to an iterative solver for Ay = b' := b - Ax_0, where x = x_0 + y.
+ */
+ void set_initial_guess(VectorT const & x) { init_guess_ = x; }
+
+ /** @brief Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor.
+ *
+ * The monitor function is called with the current guess for the result as first argument and the current relative residual estimate as second argument.
+ * The third argument is a pointer to user-defined data, through which additional information can be passed.
+ * This pointer needs to be set with set_monitor_data. If not set, NULL is passed.
+ * If the montior function returns true, the solver terminates (either convergence or divergence).
+ */
+ void set_monitor(bool (*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data)
+ {
+ monitor_callback_ = monitor_fun;
+ user_data_ = user_data;
+ }
+
+ /** @brief Returns the solver tag containing basic configuration such as tolerances, etc. */
+ bicgstab_tag const & tag() const { return tag_; }
+
+private:
+ bicgstab_tag tag_;
+ VectorT init_guess_;
+ bool (*monitor_callback_)(VectorT const &, numeric_type, void *);
+ void *user_data_;
+};
+
+
+}
+}
+
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp
new file mode 100644
index 0000000..a2daf5e
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp
@@ -0,0 +1,179 @@
+#ifndef VIENNACL_LINALG_BISECT_HPP_
+#define VIENNACL_LINALG_BISECT_HPP_
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/bisect.hpp
+* @brief Implementation of the algorithm for finding eigenvalues of a tridiagonal matrix.
+*
+* Contributed by Guenther Mader and Astrid Rupp.
+*/
+
+#include <vector>
+#include <cmath>
+#include <limits>
+#include <cstddef>
+#include "viennacl/meta/result_of.hpp"
+
+namespace viennacl
+{
+namespace linalg
+{
+
+namespace detail
+{
+ /**
+ * @brief overloaded function for copying vectors
+ */
+ template<typename NumericT, typename OtherVectorT>
+ void copy_vec_to_vec(viennacl::vector<NumericT> const & src, OtherVectorT & dest)
+ {
+ viennacl::copy(src, dest);
+ }
+
+ template<typename OtherVectorT, typename NumericT>
+ void copy_vec_to_vec(OtherVectorT const & src, viennacl::vector<NumericT> & dest)
+ {
+ viennacl::copy(src, dest);
+ }
+
+ template<typename VectorT1, typename VectorT2>
+ void copy_vec_to_vec(VectorT1 const & src, VectorT2 & dest)
+ {
+ for (vcl_size_t i=0; i<src.size(); ++i)
+ dest[i] = src[i];
+ }
+
+} //namespace detail
+
+/**
+* @brief Implementation of the bisect-algorithm for the calculation of the eigenvalues of a tridiagonal matrix. Experimental - interface might change.
+*
+* Refer to "Calculation of the Eigenvalues of a Symmetric Tridiagonal Matrix by the Method of Bisection" in the Handbook Series Linear Algebra, contributed by Barth, Martin, and Wilkinson.
+* http://www.maths.ed.ac.uk/~aar/papers/bamawi.pdf
+*
+* @param alphas Elements of the main diagonal
+* @param betas Elements of the secondary diagonal
+* @return Returns the eigenvalues of the tridiagonal matrix defined by alpha and beta
+*/
+template<typename VectorT>
+std::vector<
+ typename viennacl::result_of::cpu_value_type<typename VectorT::value_type>::type
+ >
+bisect(VectorT const & alphas, VectorT const & betas)
+{
+ typedef typename viennacl::result_of::value_type<VectorT>::type NumericType;
+ typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType;
+
+ vcl_size_t size = betas.size();
+ std::vector<CPU_NumericType> x_temp(size);
+
+
+ std::vector<CPU_NumericType> beta_bisect;
+ std::vector<CPU_NumericType> wu;
+
+ double rel_error = std::numeric_limits<CPU_NumericType>::epsilon();
+ beta_bisect.push_back(0);
+
+ for (vcl_size_t i = 1; i < size; i++)
+ beta_bisect.push_back(betas[i] * betas[i]);
+
+ double xmin = alphas[size - 1] - std::fabs(betas[size - 1]);
+ double xmax = alphas[size - 1] + std::fabs(betas[size - 1]);
+
+ for (vcl_size_t i = 0; i < size - 1; i++)
+ {
+ double h = std::fabs(betas[i]) + std::fabs(betas[i + 1]);
+ if (alphas[i] + h > xmax)
+ xmax = alphas[i] + h;
+ if (alphas[i] - h < xmin)
+ xmin = alphas[i] - h;
+ }
+
+
+ double eps1 = 1e-6;
+ /*double eps2 = (xmin + xmax > 0) ? (rel_error * xmax) : (-rel_error * xmin);
+ if (eps1 <= 0)
+ eps1 = eps2;
+ else
+ eps2 = 0.5 * eps1 + 7.0 * eps2; */
+
+ double x0 = xmax;
+
+ for (vcl_size_t i = 0; i < size; i++)
+ {
+ x_temp[i] = xmax;
+ wu.push_back(xmin);
+ }
+
+ for (long k = static_cast<long>(size) - 1; k >= 0; --k)
+ {
+ double xu = xmin;
+ for (long i = k; i >= 0; --i)
+ {
+ if (xu < wu[vcl_size_t(k-i)])
+ {
+ xu = wu[vcl_size_t(i)];
+ break;
+ }
+ }
+
+ if (x0 > x_temp[vcl_size_t(k)])
+ x0 = x_temp[vcl_size_t(k)];
+
+ double x1 = (xu + x0) / 2.0;
+ while (x0 - xu > 2.0 * rel_error * (std::fabs(xu) + std::fabs(x0)) + eps1)
+ {
+ vcl_size_t a = 0;
+ double q = 1;
+ for (vcl_size_t i = 0; i < size; i++)
+ {
+ if (q > 0 || q < 0)
+ q = alphas[i] - x1 - beta_bisect[i] / q;
+ else
+ q = alphas[i] - x1 - std::fabs(betas[i] / rel_error);
+
+ if (q < 0)
+ a++;
+ }
+
+ if (a <= static_cast<vcl_size_t>(k))
+ {
+ xu = x1;
+ if (a < 1)
+ wu[0] = x1;
+ else
+ {
+ wu[a] = x1;
+ if (x_temp[a - 1] > x1)
+ x_temp[a - 1] = x1;
+ }
+ }
+ else
+ x0 = x1;
+
+ x1 = (xu + x0) / 2.0;
+ }
+ x_temp[vcl_size_t(k)] = x1;
+ }
+ return x_temp;
+}
+
+} // end namespace linalg
+} // end namespace viennacl
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp
new file mode 100644
index 0000000..6918b14
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp
@@ -0,0 +1,173 @@
+#ifndef VIENNACL_LINALG_BISECT_GPU
+#define VIENNACL_LINALG_BISECT_GPU
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+
+/** @file viennacl/linalg/bisect_gpu.hpp
+ @brief Implementation of an bisection algorithm for eigenvalues
+
+ Implementation based on the sample provided with the CUDA 6.0 SDK, for which
+ the creation of derivative works is allowed by including the following statement:
+ "This software contains source code provided by NVIDIA Corporation."
+*/
+// includes, system
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+
+// includes, project
+#include "viennacl/linalg/detail/bisect/structs.hpp"
+#include "viennacl/linalg/detail/bisect/gerschgorin.hpp"
+#include "viennacl/linalg/detail/bisect/bisect_large.hpp"
+#include "viennacl/linalg/detail/bisect/bisect_small.hpp"
+
+
+namespace viennacl
+{
+namespace linalg
+{
+///////////////////////////////////////////////////////////////////////////
+//! @brief bisect The bisection algorithm computes the eigevalues
+//! of a symmetric tridiagonal matrix.
+//! @param diagonal diagonal elements of the matrix
+//! @param superdiagonal superdiagonal elements of the matrix
+//! @param eigenvalues Vectors with the eigenvalues in ascending order
+//! @return return false if any errors occured
+///
+//! overloaded function template: std::vectors as parameters
+template<typename NumericT>
+bool
+bisect(const std::vector<NumericT> & diagonal, const std::vector<NumericT> & superdiagonal, std::vector<NumericT> & eigenvalues)
+{
+ assert(diagonal.size() == superdiagonal.size() &&
+ diagonal.size() == eigenvalues.size() &&
+ bool("Input vectors do not have the same sizes!"));
+ bool bResult = false;
+ // flag if the matrix size is due to explicit user request
+ // desired precision of eigenvalues
+ NumericT precision = static_cast<NumericT>(0.00001);
+ const unsigned int mat_size = static_cast<unsigned int>(diagonal.size());
+
+ // set up input
+ viennacl::linalg::detail::InputData<NumericT> input(diagonal, superdiagonal, mat_size);
+
+ NumericT lg = FLT_MAX;
+ NumericT ug = -FLT_MAX;
+ // compute Gerschgorin interval
+ viennacl::linalg::detail::computeGerschgorin(input.std_a, input.std_b, mat_size, lg, ug);
+
+ // decide wheter the algorithm for small or for large matrices will be started
+ if (mat_size <= VIENNACL_BISECT_MAX_SMALL_MATRIX)
+ {
+ // initialize memory for result
+ viennacl::linalg::detail::ResultDataSmall<NumericT> result(mat_size);
+
+ // run the kernel
+ viennacl::linalg::detail::computeEigenvaluesSmallMatrix(input, result, mat_size, lg, ug, precision);
+
+ // get the result from the device and do some sanity checks,
+ viennacl::linalg::detail::processResultSmallMatrix(result, mat_size);
+ eigenvalues = result.std_eigenvalues;
+ bResult = true;
+ }
+
+ else
+ {
+ // initialize memory for result
+ viennacl::linalg::detail::ResultDataLarge<NumericT> result(mat_size);
+
+ // run the kernel
+ viennacl::linalg::detail::computeEigenvaluesLargeMatrix(input, result, mat_size, lg, ug, precision);
+
+ // get the result from the device and do some sanity checks
+ bResult = viennacl::linalg::detail::processResultDataLargeMatrix(result, mat_size);
+
+ eigenvalues = result.std_eigenvalues;
+ }
+ return bResult;
+}
+
+
+///////////////////////////////////////////////////////////////////////////
+//! @brief bisect The bisection algorithm computes the eigevalues
+//! of a symmetric tridiagonal matrix.
+//! @param diagonal diagonal elements of the matrix
+//! @param superdiagonal superdiagonal elements of the matrix
+//! @param eigenvalues Vectors with the eigenvalues in ascending order
+//! @return return false if any errors occured
+///
+//! overloaded function template: viennacl::vectors as parameters
+template<typename NumericT>
+bool
+bisect(const viennacl::vector<NumericT> & diagonal, const viennacl::vector<NumericT> & superdiagonal, viennacl::vector<NumericT> & eigenvalues)
+{
+ assert(diagonal.size() == superdiagonal.size() &&
+ diagonal.size() == eigenvalues.size() &&
+ bool("Input vectors do not have the same sizes!"));
+ bool bResult = false;
+ // flag if the matrix size is due to explicit user request
+ // desired precision of eigenvalues
+ NumericT precision = static_cast<NumericT>(0.00001);
+ const unsigned int mat_size = static_cast<unsigned int>(diagonal.size());
+
+ // set up input
+ viennacl::linalg::detail::InputData<NumericT> input(diagonal, superdiagonal, mat_size);
+
+ NumericT lg = FLT_MAX;
+ NumericT ug = -FLT_MAX;
+ // compute Gerschgorin interval
+ viennacl::linalg::detail::computeGerschgorin(input.std_a, input.std_b, mat_size, lg, ug);
+
+ // decide wheter the algorithm for small or for large matrices will be started
+ if (mat_size <= VIENNACL_BISECT_MAX_SMALL_MATRIX)
+ {
+ // initialize memory for result
+ viennacl::linalg::detail::ResultDataSmall<NumericT> result(mat_size);
+
+ // run the kernel
+ viennacl::linalg::detail::computeEigenvaluesSmallMatrix(input, result, mat_size, lg, ug, precision);
+
+ // get the result from the device and do some sanity checks,
+ viennacl::linalg::detail::processResultSmallMatrix(result, mat_size);
+ copy(result.std_eigenvalues, eigenvalues);
+ bResult = true;
+ }
+
+ else
+ {
+ // initialize memory for result
+ viennacl::linalg::detail::ResultDataLarge<NumericT> result(mat_size);
+
+ // run the kernel
+ viennacl::linalg::detail::computeEigenvaluesLargeMatrix(input, result, mat_size, lg, ug, precision);
+
+ // get the result from the device and do some sanity checks
+ bResult = viennacl::linalg::detail::processResultDataLargeMatrix(result, mat_size);
+
+ copy(result.std_eigenvalues, eigenvalues);
+ }
+ return bResult;
+}
+} // namespace linalg
+} // namespace viennacl
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp
new file mode 100644
index 0000000..93aae81
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp
@@ -0,0 +1,440 @@
+#ifndef VIENNACL_LINALG_CG_HPP_
+#define VIENNACL_LINALG_CG_HPP_
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/cg.hpp
+ @brief The conjugate gradient method is implemented here
+*/
+
+#include <vector>
+#include <map>
+#include <cmath>
+#include <numeric>
+
+#include "viennacl/forwards.h"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/linalg/ilu.hpp"
+#include "viennacl/linalg/prod.hpp"
+#include "viennacl/linalg/inner_prod.hpp"
+#include "viennacl/linalg/norm_2.hpp"
+#include "viennacl/traits/clear.hpp"
+#include "viennacl/traits/size.hpp"
+#include "viennacl/meta/result_of.hpp"
+#include "viennacl/linalg/iterative_operations.hpp"
+
+namespace viennacl
+{
+namespace linalg
+{
+
+/** @brief A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve() function
+*/
+class cg_tag
+{
+public:
+ /** @brief The constructor
+ *
+ * @param tol Relative tolerance for the residual (solver quits if ||r|| < tol * ||r_initial||)
+ * @param max_iterations The maximum number of iterations
+ */
+ cg_tag(double tol = 1e-8, unsigned int max_iterations = 300) : tol_(tol), abs_tol_(0), iterations_(max_iterations) {}
+
+ /** @brief Returns the relative tolerance */
+ double tolerance() const { return tol_; }
+
+ /** @brief Returns the absolute tolerance */
+ double abs_tolerance() const { return abs_tol_; }
+ /** @brief Sets the absolute tolerance */
+ void abs_tolerance(double new_tol) { if (new_tol >= 0) abs_tol_ = new_tol; }
+
+ /** @brief Returns the maximum number of iterations */
+ unsigned int max_iterations() const { return iterations_; }
+
+ /** @brief Return the number of solver iterations: */
+ unsigned int iters() const { return iters_taken_; }
+ void iters(unsigned int i) const { iters_taken_ = i; }
+
+ /** @brief Returns the estimated relative error at the end of the solver run */
+ double error() const { return last_error_; }
+ /** @brief Sets the estimated relative error at the end of the solver run */
+ void error(double e) const { last_error_ = e; }
+
+
+private:
+ double tol_;
+ double abs_tol_;
+ unsigned int iterations_;
+
+ //return values from solver
+ mutable unsigned int iters_taken_;
+ mutable double last_error_;
+};
+
+namespace detail
+{
+
+ /** @brief handles the no_precond case at minimal overhead */
+ template<typename VectorT, typename PreconditionerT>
+ class z_handler{
+ public:
+ z_handler(VectorT & residual) : z_(residual){ }
+ VectorT & get() { return z_; }
+ private:
+ VectorT z_;
+ };
+
+ template<typename VectorT>
+ class z_handler<VectorT, viennacl::linalg::no_precond>{
+ public:
+ z_handler(VectorT & residual) : presidual_(&residual){ }
+ VectorT & get() { return *presidual_; }
+ private:
+ VectorT * presidual_;
+ };
+
+}
+
+namespace detail
+{
+
+ /** @brief Implementation of a pipelined conjugate gradient algorithm (no preconditioner), specialized for ViennaCL types.
+ *
+ * Pipelined version from A. T. Chronopoulos and C. W. Gear, J. Comput. Appl. Math. 25(2), 153\u2013168 (1989)
+ *
+ * @param A The system matrix
+ * @param rhs The load vector
+ * @param tag Solver configuration tag
+ * @param monitor A callback routine which is called at each GMRES restart
+ * @param monitor_data Data pointer to be passed to the callback routine to pass on user-specific data
+ * @return The result vector
+ */
+ //template<typename MatrixType, typename ScalarType>
+ template<typename MatrixT, typename NumericT>
+ viennacl::vector<NumericT> pipelined_solve(MatrixT const & A, //MatrixType const & A,
+ viennacl::vector<NumericT> const & rhs,
+ cg_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ typedef typename viennacl::vector<NumericT>::difference_type difference_type;
+
+ viennacl::vector<NumericT> result(rhs);
+ viennacl::traits::clear(result);
+
+ viennacl::vector<NumericT> residual(rhs);
+ viennacl::vector<NumericT> p(rhs);
+ viennacl::vector<NumericT> Ap = viennacl::linalg::prod(A, p);
+ viennacl::vector<NumericT> inner_prod_buffer = viennacl::zero_vector<NumericT>(3*256, viennacl::traits::context(rhs)); // temporary buffer
+ std::vector<NumericT> host_inner_prod_buffer(inner_prod_buffer.size());
+ vcl_size_t buffer_size_per_vector = inner_prod_buffer.size() / 3;
+ difference_type buffer_offset_per_vector = static_cast<difference_type>(buffer_size_per_vector);
+
+ NumericT norm_rhs_squared = viennacl::linalg::norm_2(residual); norm_rhs_squared *= norm_rhs_squared;
+
+ if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //check for early convergence of A*x = 0
+ return result;
+
+ NumericT inner_prod_rr = norm_rhs_squared;
+ NumericT alpha = inner_prod_rr / viennacl::linalg::inner_prod(p, Ap);
+ NumericT beta = viennacl::linalg::norm_2(Ap); beta = (alpha * alpha * beta * beta - inner_prod_rr) / inner_prod_rr;
+ NumericT inner_prod_ApAp = 0;
+ NumericT inner_prod_pAp = 0;
+
+ for (unsigned int i = 0; i < tag.max_iterations(); ++i)
+ {
+ tag.iters(i+1);
+
+ viennacl::linalg::pipelined_cg_vector_update(result, alpha, p, residual, Ap, beta, inner_prod_buffer);
+ viennacl::linalg::pipelined_cg_prod(A, p, Ap, inner_prod_buffer);
+
+ // bring back the partial results to the host:
+ viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin());
+
+ inner_prod_rr = std::accumulate(host_inner_prod_buffer.begin(), host_inner_prod_buffer.begin() + buffer_offset_per_vector, NumericT(0));
+ inner_prod_ApAp = std::accumulate(host_inner_prod_buffer.begin() + buffer_offset_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_offset_per_vector, NumericT(0));
+ inner_prod_pAp = std::accumulate(host_inner_prod_buffer.begin() + 2 * buffer_offset_per_vector, host_inner_prod_buffer.begin() + 3 * buffer_offset_per_vector, NumericT(0));
+
+ if (monitor && monitor(result, std::sqrt(std::fabs(inner_prod_rr / norm_rhs_squared)), monitor_data))
+ break;
+ if (std::fabs(inner_prod_rr / norm_rhs_squared) < tag.tolerance() * tag.tolerance() || std::fabs(inner_prod_rr) < tag.abs_tolerance() * tag.abs_tolerance()) //squared norms involved here
+ break;
+
+ alpha = inner_prod_rr / inner_prod_pAp;
+ beta = (alpha*alpha*inner_prod_ApAp - inner_prod_rr) / inner_prod_rr;
+ }
+
+ //store last error estimate:
+ tag.error(std::sqrt(std::fabs(inner_prod_rr) / norm_rhs_squared));
+
+ return result;
+ }
+
+
+ /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
+ template<typename NumericT>
+ viennacl::vector<NumericT> solve_impl(viennacl::compressed_matrix<NumericT> const & A,
+ viennacl::vector<NumericT> const & rhs,
+ cg_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ return pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
+ }
+
+
+ /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
+ template<typename NumericT>
+ viennacl::vector<NumericT> solve_impl(viennacl::coordinate_matrix<NumericT> const & A,
+ viennacl::vector<NumericT> const & rhs,
+ cg_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
+ }
+
+
+
+ /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
+ template<typename NumericT>
+ viennacl::vector<NumericT> solve_impl(viennacl::ell_matrix<NumericT> const & A,
+ viennacl::vector<NumericT> const & rhs,
+ cg_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
+ }
+
+
+
+ /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
+ template<typename NumericT>
+ viennacl::vector<NumericT> solve_impl(viennacl::sliced_ell_matrix<NumericT> const & A,
+ viennacl::vector<NumericT> const & rhs,
+ cg_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
+ }
+
+
+ /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
+ template<typename NumericT>
+ viennacl::vector<NumericT> solve_impl(viennacl::hyb_matrix<NumericT> const & A,
+ viennacl::vector<NumericT> const & rhs,
+ cg_tag const & tag,
+ viennacl::linalg::no_precond,
+ bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
+ }
+
+
+ template<typename MatrixT, typename VectorT, typename PreconditionerT>
+ VectorT solve_impl(MatrixT const & matrix,
+ VectorT const & rhs,
+ cg_tag const & tag,
+ PreconditionerT const & precond,
+ bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL,
+ void *monitor_data = NULL)
+ {
+ typedef typename viennacl::result_of::value_type<VectorT>::type NumericType;
+ typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType;
+
+ VectorT result = rhs;
+ viennacl::traits::clear(result);
+
+ VectorT residual = rhs;
+ VectorT tmp = rhs;
+ detail::z_handler<VectorT, PreconditionerT> zhandler(residual);
+ VectorT & z = zhandler.get();
+
+ precond.apply(z);
+ VectorT p = z;
+
+ CPU_NumericType ip_rr = viennacl::linalg::inner_prod(residual, z);
+ CPU_NumericType alpha;
+ CPU_NumericType new_ip_rr = 0;
+ CPU_NumericType beta;
+ CPU_NumericType norm_rhs_squared = ip_rr;
+ CPU_NumericType new_ipp_rr_over_norm_rhs;
+
+ if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //solution is zero if RHS norm (squared) is zero
+ return result;
+
+ for (unsigned int i = 0; i < tag.max_iterations(); ++i)
+ {
+ tag.iters(i+1);
+ tmp = viennacl::linalg::prod(matrix, p);
+
+ alpha = ip_rr / viennacl::linalg::inner_prod(tmp, p);
+
+ result += alpha * p;
+ residual -= alpha * tmp;
+ z = residual;
+ precond.apply(z);
+
+ if (static_cast<VectorT*>(&residual)==static_cast<VectorT*>(&z))
+ new_ip_rr = std::pow(viennacl::linalg::norm_2(residual),2);
+ else
+ new_ip_rr = viennacl::linalg::inner_prod(residual, z);
+
+ new_ipp_rr_over_norm_rhs = new_ip_rr / norm_rhs_squared;
+ if (monitor && monitor(result, std::sqrt(std::fabs(new_ipp_rr_over_norm_rhs)), monitor_data))
+ break;
+ if (std::fabs(new_ipp_rr_over_norm_rhs) < tag.tolerance() * tag.tolerance() || std::fabs(new_ip_rr) < tag.abs_tolerance() * tag.abs_tolerance()) //squared norms involved here
+ break;
+
+ beta = new_ip_rr / ip_rr;
+ ip_rr = new_ip_rr;
+
+ p = z + beta*p;
+ }
+
+ //store last error estimate:
+ tag.error(std::sqrt(std::fabs(new_ip_rr / norm_rhs_squared)));
+
+ return result;
+ }
+
+}
+
+
+
+/** @brief Implementation of the preconditioned conjugate gradient solver, generic implementation for non-ViennaCL types.
+*
+* Following Algorithm 9.1 in "Iterative Methods for Sparse Linear Systems" by Y. Saad
+*
+* @param matrix The system matrix
+* @param rhs The load vector
+* @param tag Solver configuration tag
+* @param precond A preconditioner. Precondition operation is done via member function apply()
+* @return The result vector
+*/
+template<typename MatrixT, typename VectorT, typename PreconditionerT>
+VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag, PreconditionerT const & precond)
+{
+ return detail::solve_impl(matrix, rhs, tag, precond);
+}
+
+/** @brief Convenience overload for calling the CG solver using types from the C++ STL.
+ *
+ * A std::vector<std::map<T, U> > matrix is convenient for e.g. finite element assembly.
+ * It is not the fastest option for setting up a system, but often it is fast enough - particularly for just trying things out.
+ */
+template<typename IndexT, typename NumericT, typename PreconditionerT>
+std::vector<NumericT> solve(std::vector< std::map<IndexT, NumericT> > const & A, std::vector<NumericT> const & rhs, cg_tag const & tag, PreconditionerT const & precond)
+{
+ viennacl::compressed_matrix<NumericT> vcl_A;
+ viennacl::copy(A, vcl_A);
+
+ viennacl::vector<NumericT> vcl_rhs(rhs.size());
+ viennacl::copy(rhs, vcl_rhs);
+
+ viennacl::vector<NumericT> vcl_result = solve(vcl_A, vcl_rhs, tag, precond);
+
+ std::vector<NumericT> result(vcl_result.size());
+ viennacl::copy(vcl_result, result);
+ return result;
+}
+
+/** @brief Entry point for the unpreconditioned CG method.
+ *
+ * @param matrix The system matrix
+ * @param rhs Right hand side vector (load vector)
+ * @param tag A BiCGStab tag providing relative tolerances, etc.
+ */
+template<typename MatrixT, typename VectorT>
+VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag)
+{
+ return solve(matrix, rhs, tag, viennacl::linalg::no_precond());
+}
+
+
+
+template<typename VectorT>
+class cg_solver
+{
+public:
+ typedef typename viennacl::result_of::cpu_value_type<VectorT>::type numeric_type;
+
+ cg_solver(cg_tag const & tag) : tag_(tag), monitor_callback_(NULL), user_data_(NULL) {}
+
+ template<typename MatrixT, typename PreconditionerT>
+ VectorT operator()(MatrixT const & A, VectorT const & b, PreconditionerT const & precond) const
+ {
+ if (viennacl::traits::size(init_guess_) > 0) // take initial guess into account
+ {
+ VectorT mod_rhs = viennacl::linalg::prod(A, init_guess_);
+ mod_rhs = b - mod_rhs;
+ VectorT y = detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_);
+ return init_guess_ + y;
+ }
+ return detail::solve_impl(A, b, tag_, precond, monitor_callback_, user_data_);
+ }
+
+
+ template<typename MatrixT>
+ VectorT operator()(MatrixT const & A, VectorT const & b) const
+ {
+ return operator()(A, b, viennacl::linalg::no_precond());
+ }
+
+ /** @brief Specifies an initial guess for the iterative solver.
+ *
+ * An iterative solver for Ax = b with initial guess x_0 is equivalent to an iterative solver for Ay = b' := b - Ax_0, where x = x_0 + y.
+ */
+ void set_initial_guess(VectorT const & x) { init_guess_ = x; }
+
+ /** @brief Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor.
+ *
+ * The monitor function is called with the current guess for the result as first argument and the current relative residual estimate as second argument.
+ * The third argument is a pointer to user-defined data, through which additional information can be passed.
+ * This pointer needs to be set with set_monitor_data. If not set, NULL is passed.
+ * If the montior function returns true, the solver terminates (either convergence or divergence).
+ */
+ void set_monitor(bool (*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data)
+ {
+ monitor_callback_ = monitor_fun;
+ user_data_ = user_data;
+ }
+
+ /** @brief Returns the solver tag containing basic configuration such as tolerances, etc. */
+ cg_tag const & tag() const { return tag_; }
+
+private:
+ cg_tag tag_;
+ VectorT init_guess_;
+ bool (*monitor_callback_)(VectorT const &, numeric_type, void *);
+ void *user_data_;
+};
+
+
+}
+}
+
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp
new file mode 100644
index 0000000..5325b7b
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp
@@ -0,0 +1,75 @@
+#ifndef VIENNACL_LINALG_CIRCULANT_MATRIX_OPERATIONS_HPP_
+#define VIENNACL_LINALG_CIRCULANT_MATRIX_OPERATIONS_HPP_
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/circulant_matrix_operations.hpp
+ @brief Implementations of operations using circulant_matrix. Experimental.
+*/
+
+#include "viennacl/forwards.h"
+#include "viennacl/ocl/backend.hpp"
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/fft.hpp"
+//#include "viennacl/linalg/kernels/coordinate_matrix_kernels.h"
+
+namespace viennacl
+{
+namespace linalg
+{
+
+// A * x
+
+/** @brief Carries out matrix-vector multiplication with a circulant_matrix
+*
+* Implementation of the convenience expression result = prod(mat, vec);
+*
+* @param mat The matrix
+* @param vec The vector
+* @param result The result vector
+*/
+template<typename NumericT, unsigned int AlignmentV>
+void prod_impl(viennacl::circulant_matrix<NumericT, AlignmentV> const & mat,
+ viennacl::vector_base<NumericT> const & vec,
+ viennacl::vector_base<NumericT> & result)
+{
+ assert(mat.size1() == result.size() && bool("Dimension mismatch"));
+ assert(mat.size2() == vec.size() && bool("Dimension mismatch"));
+ //result.clear();
+
+ //std::cout << "prod(circulant_matrix" << ALIGNMENT << ", vector) called with internal_nnz=" << mat.internal_nnz() << std::endl;
+
+ viennacl::vector<NumericT> circ(mat.elements().size() * 2);
+ viennacl::linalg::real_to_complex(mat.elements(), circ, mat.elements().size());
+
+ viennacl::vector<NumericT> tmp(vec.size() * 2);
+ viennacl::vector<NumericT> tmp2(vec.size() * 2);
+
+ viennacl::linalg::real_to_complex(vec, tmp, vec.size());
+ viennacl::linalg::convolve(circ, tmp, tmp2);
+ viennacl::linalg::complex_to_real(tmp2, result, vec.size());
+
+}
+
+} //namespace linalg
+} //namespace viennacl
+
+
+#endif