You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ap...@apache.org on 2016/06/10 16:52:32 UTC
[27/51] [partial] mahout git commit: Revert "(nojira) add
native-viennaCL module to codebase. closes apache/mahout#241"
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp
deleted file mode 100644
index 8361308..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp
+++ /dev/null
@@ -1,208 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_AMG_AMG_BASE_HPP_
-#define VIENNACL_LINALG_DETAIL_AMG_AMG_BASE_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file amg_base.hpp
- @brief Helper classes and functions for the AMG preconditioner. Experimental.
-
- AMG code contributed by Markus Wagner
-*/
-
-#include <cmath>
-#include <set>
-#include <list>
-#include <stdexcept>
-#include <algorithm>
-
-#include <map>
-#ifdef VIENNACL_WITH_OPENMP
-#include <omp.h>
-#endif
-
-#include "viennacl/context.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-
-/** @brief Enumeration of coarsening methods for algebraic multigrid. */
-enum amg_coarsening_method
-{
- AMG_COARSENING_METHOD_ONEPASS = 1,
- AMG_COARSENING_METHOD_AGGREGATION,
- AMG_COARSENING_METHOD_MIS2_AGGREGATION
-};
-
-/** @brief Enumeration of interpolation methods for algebraic multigrid. */
-enum amg_interpolation_method
-{
- AMG_INTERPOLATION_METHOD_DIRECT = 1,
- AMG_INTERPOLATION_METHOD_AGGREGATION,
- AMG_INTERPOLATION_METHOD_SMOOTHED_AGGREGATION
-};
-
-
-/** @brief A tag for algebraic multigrid (AMG). Used to transport information from the user to the implementation.
-*/
-class amg_tag
-{
-public:
- /** @brief The constructor, setting default values for the various parameters.
- *
- * Default coarsening routine: Aggreggation based on maximum independent sets of distance (MIS-2)
- * Default interpolation routine: Smoothed aggregation
- * Default threshold for strong connections: 0.1 (customizations are recommeded!)
- * Default weight for Jacobi smoother: 1.0
- * Default number of pre-smooth operations: 2
- * Default number of post-smooth operations: 2
- * Default number of coarse levels: 0 (this indicates that as many coarse levels as needed are constructed until the cutoff is reached)
- * Default coarse grid size for direct solver (coarsening cutoff): 50
- */
- amg_tag()
- : coarsening_method_(AMG_COARSENING_METHOD_MIS2_AGGREGATION), interpolation_method_(AMG_INTERPOLATION_METHOD_AGGREGATION),
- strong_connection_threshold_(0.1), jacobi_weight_(1.0),
- presmooth_steps_(2), postsmooth_steps_(2),
- coarse_levels_(0), coarse_cutoff_(50) {}
-
- // Getter-/Setter-Functions
- /** @brief Sets the strategy used for constructing coarse grids */
- void set_coarsening_method(amg_coarsening_method s) { coarsening_method_ = s; }
- /** @brief Returns the current coarsening strategy */
- amg_coarsening_method get_coarsening_method() const { return coarsening_method_; }
-
- /** @brief Sets the interpolation method to the provided method */
- void set_interpolation_method(amg_interpolation_method interpol) { interpolation_method_ = interpol; }
- /** @brief Returns the current interpolation method */
- amg_interpolation_method get_interpolation_method() const { return interpolation_method_; }
-
- /** @brief Sets the strong connection threshold. Customizations by the user essential for best results!
- *
- * With classical interpolation, a connection is considered strong if |a_ij| >= threshold * max_k(|a_ik|)
- * Strength of connection currently ignored for aggregation-based coarsening (to be added in the future).
- */
- void set_strong_connection_threshold(double threshold) { if (threshold > 0) strong_connection_threshold_ = threshold; }
- /** @brief Returns the strong connection threshold parameter.
- *
- * @see set_strong_connection_threshold() for an explanation of the threshold parameter
- */
- double get_strong_connection_threshold() const { return strong_connection_threshold_; }
-
- /** @brief Sets the weight (damping) for the Jacobi smoother.
- *
- * The optimal value depends on the problem at hand. Values of 0.67 or 1.0 are usually a good starting point for further experiments.
- */
- void set_jacobi_weight(double w) { if (w > 0) jacobi_weight_ = w; }
- /** @brief Returns the Jacobi smoother weight (damping). */
- double get_jacobi_weight() const { return jacobi_weight_; }
-
- /** @brief Sets the number of smoother applications on the fine level before restriction to the coarser level. */
- void set_presmooth_steps(vcl_size_t steps) { presmooth_steps_ = steps; }
- /** @brief Returns the number of smoother applications on the fine level before restriction to the coarser level. */
- vcl_size_t get_presmooth_steps() const { return presmooth_steps_; }
-
- /** @brief Sets the number of smoother applications on the coarse level before interpolation to the finer level. */
- void set_postsmooth_steps(vcl_size_t steps) { postsmooth_steps_ = steps; }
- /** @brief Returns the number of smoother applications on the coarse level before interpolation to the finer level. */
- vcl_size_t get_postsmooth_steps() const { return postsmooth_steps_; }
-
- /** @brief Sets the number of coarse levels. If set to zero, then coarse levels are constructed until the cutoff size is reached. */
- void set_coarse_levels(vcl_size_t levels) { coarse_levels_ = levels; }
- /** @brief Returns the number of coarse levels. If zero, then coarse levels are constructed until the cutoff size is reached. */
- vcl_size_t get_coarse_levels() const { return coarse_levels_; }
-
- /** @brief Sets the coarse grid size for which the recursive multigrid scheme is stopped and a direct solver is used. */
- void set_coarsening_cutoff(vcl_size_t size) { coarse_cutoff_ = size; }
- /** @brief Returns the coarse grid size for which the recursive multigrid scheme is stopped and a direct solver is used. */
- vcl_size_t get_coarsening_cutoff() const { return coarse_cutoff_; }
-
- /** @brief Sets the ViennaCL context for the setup stage. Set this to a host context if you want to run the setup on the host.
- *
- * Set the ViennaCL context for the solver application via set_target_context().
- * Target and setup context can be different.
- */
- void set_setup_context(viennacl::context ctx) { setup_ctx_ = ctx; }
- /** @brief Returns the ViennaCL context for the preconditioenr setup. */
- viennacl::context const & get_setup_context() const { return setup_ctx_; }
-
- /** @brief Sets the ViennaCL context for the solver cycle stage (i.e. preconditioner applications).
- *
- * Since the cycle stage easily benefits from accelerators, you usually want to set this to a CUDA or OpenCL-enabled context.
- */
- void set_target_context(viennacl::context ctx) { target_ctx_ = ctx; }
- /** @brief Returns the ViennaCL context for the solver cycle stage (i.e. preconditioner applications). */
- viennacl::context const & get_target_context() const { return target_ctx_; }
-
-private:
- amg_coarsening_method coarsening_method_;
- amg_interpolation_method interpolation_method_;
- double strong_connection_threshold_, jacobi_weight_;
- vcl_size_t presmooth_steps_, postsmooth_steps_, coarse_levels_, coarse_cutoff_;
- viennacl::context setup_ctx_, target_ctx_;
-};
-
-
-namespace detail
-{
-namespace amg
-{
-
-
- struct amg_level_context
- {
- void resize(vcl_size_t num_points, vcl_size_t max_nnz)
- {
- influence_jumper_.resize(num_points + 1, false);
- influence_ids_.resize(max_nnz, false);
- influence_values_.resize(num_points, false);
- point_types_.resize(num_points, false);
- coarse_id_.resize(num_points, false);
- }
-
- void switch_context(viennacl::context ctx)
- {
- influence_jumper_.switch_memory_context(ctx);
- influence_ids_.switch_memory_context(ctx);
- influence_values_.switch_memory_context(ctx);
- point_types_.switch_memory_context(ctx);
- coarse_id_.switch_memory_context(ctx);
- }
-
- enum
- {
- POINT_TYPE_UNDECIDED = 0,
- POINT_TYPE_COARSE,
- POINT_TYPE_FINE
- } amg_point_types;
-
- viennacl::vector<unsigned int> influence_jumper_; // similar to row_buffer for CSR matrices
- viennacl::vector<unsigned int> influence_ids_; // IDs of influencing points
- viennacl::vector<unsigned int> influence_values_; // Influence measure for each point
- viennacl::vector<unsigned int> point_types_; // 0: undecided, 1: coarse point, 2: fine point. Using char here because type for enum might be a larger type
- viennacl::vector<unsigned int> coarse_id_; // coarse ID used on the next level. Only valid for coarse points. Fine points may (ab)use their entry for something else.
- unsigned int num_coarse_;
- };
-
-
-} //namespace amg
-}
-}
-}
-
-#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp
deleted file mode 100644
index 8308f77..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp
+++ /dev/null
@@ -1,191 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BISECT_KERNEL_CALLS_HPP_
-#define VIENNACL_LINALG_DETAIL_BISECT_KERNEL_CALLS_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp
- @brief Kernel calls for the bisection algorithm
-
- Implementation based on the sample provided with the CUDA 6.0 SDK, for which
- the creation of derivative works is allowed by including the following statement:
- "This software contains source code provided by NVIDIA Corporation."
-*/
-
-
-#include "viennacl/forwards.h"
-#include "viennacl/scalar.hpp"
-#include "viennacl/vector.hpp"
-#include "viennacl/vector_proxy.hpp"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/meta/enable_if.hpp"
-#include "viennacl/meta/predicate.hpp"
-#include "viennacl/meta/result_of.hpp"
-#include "viennacl/traits/size.hpp"
-#include "viennacl/traits/start.hpp"
-#include "viennacl/traits/handle.hpp"
-#include "viennacl/traits/stride.hpp"
-
-#include "viennacl/linalg/detail/bisect/structs.hpp"
-#ifdef VIENNACL_WITH_OPENCL
- #include "viennacl/linalg/opencl/bisect_kernel_calls.hpp"
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
- #include "viennacl/linalg/cuda/bisect_kernel_calls.hpp"
-#endif
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
- template<typename NumericT>
- void bisectSmall(const InputData<NumericT> &input, ResultDataSmall<NumericT> &result,
- const unsigned int mat_size,
- const NumericT lg, const NumericT ug,
- const NumericT precision)
- {
- switch (viennacl::traits::handle(input.g_a).get_active_handle_id())
- {
-#ifdef VIENNACL_WITH_OPENCL
- case viennacl::OPENCL_MEMORY:
- viennacl::linalg::opencl::bisectSmall(input, result,
- mat_size,
- lg,ug,
- precision);
- break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
- case viennacl::CUDA_MEMORY:
- viennacl::linalg::cuda::bisectSmall(input, result,
- mat_size,
- lg,ug,
- precision);
- break;
-#endif
- case viennacl::MEMORY_NOT_INITIALIZED:
- throw memory_exception("not initialised!");
- default:
- throw memory_exception("not implemented");
- }
- }
-
-
-
-
- template<typename NumericT>
- void bisectLarge(const InputData<NumericT> &input, ResultDataLarge<NumericT> &result,
- const unsigned int mat_size,
- const NumericT lg, const NumericT ug,
- const NumericT precision)
- {
- switch (viennacl::traits::handle(input.g_a).get_active_handle_id())
- {
-#ifdef VIENNACL_WITH_OPENCL
- case viennacl::OPENCL_MEMORY:
- viennacl::linalg::opencl::bisectLarge(input, result,
- mat_size,
- lg,ug,
- precision);
- break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
- case viennacl::CUDA_MEMORY:
- viennacl::linalg::cuda::bisectLarge(input, result,
- mat_size,
- lg,ug,
- precision);
- break;
-#endif
- case viennacl::MEMORY_NOT_INITIALIZED:
- throw memory_exception("not initialised!");
- default:
- throw memory_exception("not implemented");
- }
- }
-
-
-
-
-
- template<typename NumericT>
- void bisectLarge_OneIntervals(const InputData<NumericT> &input, ResultDataLarge<NumericT> &result,
- const unsigned int mat_size,
- const NumericT precision)
- {
- switch (viennacl::traits::handle(input.g_a).get_active_handle_id())
- {
-#ifdef VIENNACL_WITH_OPENCL
- case viennacl::OPENCL_MEMORY:
- viennacl::linalg::opencl::bisectLargeOneIntervals(input, result,
- mat_size,
- precision);
- break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
- case viennacl::CUDA_MEMORY:
- viennacl::linalg::cuda::bisectLarge_OneIntervals(input, result,
- mat_size,
- precision);
-
- break;
-#endif
- case viennacl::MEMORY_NOT_INITIALIZED:
- throw memory_exception("not initialised!");
- default:
- throw memory_exception("not implemented");
- }
- }
-
-
-
-
- template<typename NumericT>
- void bisectLarge_MultIntervals(const InputData<NumericT> &input, ResultDataLarge<NumericT> &result,
- const unsigned int mat_size,
- const NumericT precision)
- {
- switch (viennacl::traits::handle(input.g_a).get_active_handle_id())
- {
-#ifdef VIENNACL_WITH_OPENCL
- case viennacl::OPENCL_MEMORY:
- viennacl::linalg::opencl::bisectLargeMultIntervals(input, result,
- mat_size,
- precision);
- break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
- case viennacl::CUDA_MEMORY:
- viennacl::linalg::cuda::bisectLarge_MultIntervals(input, result,
- mat_size,
- precision);
- break;
-#endif
- case viennacl::MEMORY_NOT_INITIALIZED:
- throw memory_exception("not initialised!");
- default:
- throw memory_exception("not implemented");
- }
- }
-} // namespace detail
-} // namespace linalg
-} //namespace viennacl
-
-
-#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp
deleted file mode 100755
index 337858f..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp
+++ /dev/null
@@ -1,142 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BISECT_BISECT_LARGE_HPP_
-#define VIENNACL_LINALG_DETAIL_BISECT_BISECT_LARGE_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-
-/** @file viennacl/linalg/detail//bisect/bisect_large.hpp
- @brief Computation of eigenvalues of a large symmetric, tridiagonal matrix
-
- Implementation based on the sample provided with the CUDA 6.0 SDK, for which
- the creation of derivative works is allowed by including the following statement:
- "This software contains source code provided by NVIDIA Corporation."
-*/
-
-
-// includes, system
-#include <iostream>
-#include <iomanip>
-#include <stdlib.h>
-#include <stdio.h>
-
-// includes, project
-#include "viennacl/linalg/detail/bisect/config.hpp"
-#include "viennacl/linalg/detail/bisect/structs.hpp"
-#include "viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-
-////////////////////////////////////////////////////////////////////////////////
-//! Run the kernels to compute the eigenvalues for large matrices
-//! @param input handles to input data
-//! @param result handles to result data
-//! @param mat_size matrix size
-//! @param precision desired precision of eigenvalues
-//! @param lg lower limit of Gerschgorin interval
-//! @param ug upper limit of Gerschgorin interval
-////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-void
-computeEigenvaluesLargeMatrix(InputData<NumericT> &input, ResultDataLarge<NumericT> &result,
- const unsigned int mat_size,
- const NumericT lg, const NumericT ug, const NumericT precision)
-{
-
-
- // First kernel call: decide on which intervals bisect_Large_OneIntervals/
- // bisect_Large_MultIntervals is executed
- viennacl::linalg::detail::bisectLarge(input, result, mat_size, lg, ug, precision);
-
- // compute eigenvalues for intervals that contained only one eigenvalue
- // after the first processing step
- viennacl::linalg::detail::bisectLarge_OneIntervals(input, result, mat_size, precision);
-
- // process intervals that contained more than one eigenvalue after
- // the first processing step
- viennacl::linalg::detail::bisectLarge_MultIntervals(input, result, mat_size, precision);
-
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Process the result, that is obtain result from device and do simple sanity
-//! checking
-//! @param result handles to result data
-//! @param mat_size matrix size
-////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-bool
-processResultDataLargeMatrix(ResultDataLarge<NumericT> &result,
- const unsigned int mat_size)
-{
- bool bCompareResult = true;
- // copy data from intervals that contained more than one eigenvalue after
- // the first processing step
- std::vector<NumericT> lambda_mult(mat_size);
- viennacl::copy(result.g_lambda_mult, lambda_mult);
-
- std::vector<unsigned int> pos_mult(mat_size);
- viennacl::copy(result.g_pos_mult, pos_mult);
-
- std::vector<unsigned int> blocks_mult_sum(mat_size);
- viennacl::copy(result.g_blocks_mult_sum, blocks_mult_sum);
-
- unsigned int num_one_intervals = result.g_num_one;
- unsigned int sum_blocks_mult = mat_size - num_one_intervals;
-
-
- // copy data for intervals that contained one eigenvalue after the first
- // processing step
- std::vector<NumericT> left_one(mat_size);
- std::vector<NumericT> right_one(mat_size);
- std::vector<unsigned int> pos_one(mat_size);
-
- viennacl::copy(result.g_left_one, left_one);
- viennacl::copy(result.g_right_one, right_one);
- viennacl::copy(result.g_pos_one, pos_one);
-
-
- // singleton intervals generated in the second step
- for (unsigned int i = 0; i < sum_blocks_mult; ++i)
- {
- if (pos_mult[i] != 0)
- result.std_eigenvalues[pos_mult[i] - 1] = lambda_mult[i];
-
- else
- {
- throw memory_exception("Invalid array index! Are there more than 256 equal eigenvalues?");
- }
- }
-
- // singleton intervals generated in the first step
- unsigned int index = 0;
-
- for (unsigned int i = 0; i < num_one_intervals; ++i, ++index)
- {
- result.std_eigenvalues[pos_one[i] - 1] = left_one[i];
- }
- return bCompareResult;
-}
-} // namespace detail
-} // namespace linalg
-} // namespace viennacl
-#endif //VIENNACL_LINALG_DETAIL_BISECT_LARGE_HPP_
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp
deleted file mode 100755
index 144640b..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp
+++ /dev/null
@@ -1,96 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BISECT_SMALL_HPP_
-#define VIENNACL_LINALG_DETAIL_BISECT_SMALL_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-
-/** @file viennacl/linalg/detail//bisect/bisect_small.hpp
- @brief Computation of eigenvalues of a small symmetric, tridiagonal matrix
-
- Implementation based on the sample provided with the CUDA 6.0 SDK, for which
- the creation of derivative works is allowed by including the following statement:
- "This software contains source code provided by NVIDIA Corporation."
-*/
-
-// includes, system
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-
-// includes, project
-
-#include "viennacl/linalg/detail/bisect/structs.hpp"
-
-// includes, kernels
-#include "viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-////////////////////////////////////////////////////////////////////////////////
-//! Determine eigenvalues for matrices smaller than MAX_SMALL_MATRIX
-//! @param input handles to input data of kernel
-//! @param result handles to result of kernel
-//! @param mat_size matrix size
-//! @param lg lower limit of Gerschgorin interval
-//! @param ug upper limit of Gerschgorin interval
-//! @param precision desired precision of eigenvalues
-////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-void
-computeEigenvaluesSmallMatrix(const InputData<NumericT> &input, ResultDataSmall<NumericT> &result,
- const unsigned int mat_size,
- const NumericT lg, const NumericT ug,
- const NumericT precision)
-{
- viennacl::linalg::detail::bisectSmall( input, result, mat_size, lg, ug, precision);
-}
-
-
-////////////////////////////////////////////////////////////////////////////////
-//! Process the result obtained on the device, that is transfer to host and
-//! perform basic sanity checking
-//! @param result handles to result data
-//! @param mat_size matrix size
-////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-void
-processResultSmallMatrix(ResultDataSmall<NumericT> &result,
- const unsigned int mat_size)
-{
- // copy data back to host
- std::vector<NumericT> left(mat_size);
- std::vector<unsigned int> left_count(mat_size);
-
- viennacl::copy(result.vcl_g_left, left);
- viennacl::copy(result.vcl_g_left_count, left_count);
-
- for (unsigned int i = 0; i < mat_size; ++i)
- {
- result.std_eigenvalues[left_count[i]] = left[i];
- }
-}
-} // namespace detail
-} // namespace linalg
-} // namespace viennacl
-#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp
deleted file mode 100755
index 3afa509..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BISECT_CONFIG_HPP_
-#define VIENNACL_LINALG_DETAIL_BISECT_CONFIG_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-
-
-/** @file viennacl/linalg/detail//bisect/config.hpp
- * @brief Global configuration parameters
- *
- * Implementation based on the sample provided with the CUDA 6.0 SDK, for which
- * the creation of derivative works is allowed by including the following statement:
- * "This software contains source code provided by NVIDIA Corporation."
- * */
-
-// should be power of two
-#define VIENNACL_BISECT_MAX_THREADS_BLOCK 256
-
-#ifdef VIENNACL_WITH_OPENCL
-# define VIENNACL_BISECT_MAX_SMALL_MATRIX 256
-# define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX 256
-#else // if CUDA is used
-# define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX 512 // change to 256 if errors occur
-# define VIENNACL_BISECT_MAX_SMALL_MATRIX 512 // change to 256 if errors occur
-#endif
-
- #define VIENNACL_BISECT_MIN_ABS_INTERVAL 5.0e-37
-
-#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp
deleted file mode 100755
index 53cd863..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef _VIENNACL_LINALG_DETAIL_BISECT_GERSCHORIN_HPP_
-#define _VIENNACL_LINALG_DETAIL_BISECT_GERSCHORIN_HPP_
-
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-
-/** @file viennacl/linalg/detail//bisect/gerschgorin.hpp
- @brief Computation of Gerschgorin interval for symmetric, tridiagonal matrix
-
- Implementation based on the sample provided with the CUDA 6.0 SDK, for which
- the creation of derivative works is allowed by including the following statement:
- "This software contains source code provided by NVIDIA Corporation."
-*/
-
-#include <cstdio>
-#include <cstdlib>
-#include <cmath>
-#include <cfloat>
-
-#include "viennacl/linalg/detail/bisect/util.hpp"
-#include "viennacl/vector.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
- ////////////////////////////////////////////////////////////////////////////////
- //! Compute Gerschgorin interval for symmetric, tridiagonal matrix
- //! @param d diagonal elements
- //! @param s superdiagonal elements
- //! @param n size of matrix
- //! @param lg lower limit of Gerschgorin interval
- //! @param ug upper limit of Gerschgorin interval
- ////////////////////////////////////////////////////////////////////////////////
- template<typename NumericT>
- void
- computeGerschgorin(std::vector<NumericT> & d, std::vector<NumericT> & s, unsigned int n, NumericT &lg, NumericT &ug)
- {
- // compute bounds
- for (unsigned int i = 1; i < (n - 1); ++i)
- {
-
- // sum over the absolute values of all elements of row i
- NumericT sum_abs_ni = fabsf(s[i]) + fabsf(s[i + 1]);
-
- lg = min(lg, d[i] - sum_abs_ni);
- ug = max(ug, d[i] + sum_abs_ni);
- }
-
- // first and last row, only one superdiagonal element
-
- // first row
- lg = min(lg, d[0] - fabsf(s[1]));
- ug = max(ug, d[0] + fabsf(s[1]));
-
- // last row
- lg = min(lg, d[n-1] - fabsf(s[n-1]));
- ug = max(ug, d[n-1] + fabsf(s[n-1]));
-
- // increase interval to avoid side effects of fp arithmetic
- NumericT bnorm = max(fabsf(ug), fabsf(lg));
-
- // these values depend on the implmentation of floating count that is
- // employed in the following
- NumericT psi_0 = 11 * FLT_EPSILON * bnorm;
- NumericT psi_n = 11 * FLT_EPSILON * bnorm;
-
- lg = lg - bnorm * 2 * static_cast<NumericT>(n) * FLT_EPSILON - psi_0;
- ug = ug + bnorm * 2 * static_cast<NumericT>(n) * FLT_EPSILON + psi_n;
-
- ug = max(lg, ug);
- }
-} // namespace detail
-} // namespace linalg
-} // namespace viennacl
-#endif // _VIENNACL_LINALG_DETAIL_GERSCHORIN_H_
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp
deleted file mode 100755
index 1943da3..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp
+++ /dev/null
@@ -1,182 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BISECT_STRUCTS_HPP_
-#define VIENNACL_LINALG_DETAIL_BISECT_STRUCTS_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-
-/** @file viennacl/linalg/detail//bisect/structs.hpp
- @brief Helper structures to simplify variable handling
-
- Implementation based on the sample provided with the CUDA 6.0 SDK, for which
- the creation of derivative works is allowed by including the following statement:
- "This software contains source code provided by NVIDIA Corporation."
-*/
-
-
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-#include "viennacl/vector.hpp"
-#include "viennacl/matrix.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-
-/////////////////////////////////////////////////////////////////////////////////
-//! In this class the input matrix is stored
-/////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-struct InputData
-{
- //! host side representation of diagonal
- std::vector<NumericT> std_a;
- //! host side representation superdiagonal
- std::vector<NumericT> std_b;
- //! device side representation of diagonal
- viennacl::vector<NumericT> g_a;
- //!device side representation of superdiagonal
- viennacl::vector<NumericT> g_b;
-
- /** @brief Initialize the input data to the algorithm
- *
- * @param diagonal vector with the diagonal elements
- * @param superdiagonal vector with the superdiagonal elements
- * @param sz size of the matrix
- */
- InputData(std::vector<NumericT> diagonal, std::vector<NumericT> superdiagonal, const unsigned int sz) :
- std_a(sz), std_b(sz), g_a(sz), g_b(sz)
- {
- std_a = diagonal;
- std_b = superdiagonal;
-
- viennacl::copy(std_b, g_b);
- viennacl::copy(std_a, g_a);
- }
-
- InputData(viennacl::vector<NumericT> diagonal, viennacl::vector<NumericT> superdiagonal, const unsigned int sz) :
- std_a(sz), std_b(sz), g_a(sz), g_b(sz)
- {
- g_a = diagonal;
- g_b = superdiagonal;
-
- viennacl::copy(g_a, std_a);
- viennacl::copy(g_b, std_b);
- }
-};
-
-
-/////////////////////////////////////////////////////////////////////////////////
-//! In this class the data of the result for small matrices is stored
-/////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-struct ResultDataSmall
-{
- //! eigenvalues (host side)
- std::vector<NumericT> std_eigenvalues;
- //! left interval limits at the end of the computation
- viennacl::vector<NumericT> vcl_g_left;
- //! right interval limits at the end of the computation
- viennacl::vector<NumericT> vcl_g_right;
- //! number of eigenvalues smaller than the left interval limit
- viennacl::vector<unsigned int> vcl_g_left_count;
- //! number of eigenvalues bigger than the right interval limit
- viennacl::vector<unsigned int> vcl_g_right_count;
-
-
- ////////////////////////////////////////////////////////////////////////////////
- //! Initialize variables and memory for the result for small matrices
- ////////////////////////////////////////////////////////////////////////////////
- ResultDataSmall(const unsigned int mat_size) :
- std_eigenvalues(mat_size), vcl_g_left(mat_size), vcl_g_right(mat_size), vcl_g_left_count(mat_size), vcl_g_right_count(mat_size) {}
-};
-
-
-
-
-
-/////////////////////////////////////////////////////////////////////////////////
-//! In this class the data of the result for large matrices is stored
-/////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-struct ResultDataLarge
-{
-//! eigenvalues
- std::vector<NumericT> std_eigenvalues;
-
- //! number of intervals containing one eigenvalue after the first step
- viennacl::scalar<unsigned int> g_num_one;
-
- //! number of (thread) blocks of intervals containing multiple eigenvalues after the first steo
- viennacl::scalar<unsigned int> g_num_blocks_mult;
-
- //! left interval limits of intervals containing one eigenvalue after the first iteration step
- viennacl::vector<NumericT> g_left_one;
-
- //! right interval limits of intervals containing one eigenvalue after the first iteration step
- viennacl::vector<NumericT> g_right_one;
-
- //! interval indices (position in sorted listed of eigenvalues) of intervals containing one eigenvalue after the first iteration step
- viennacl::vector<unsigned int> g_pos_one;
-
- //! left interval limits of intervals containing multiple eigenvalues after the first iteration step
- viennacl::vector<NumericT> g_left_mult;
- //! right interval limits of intervals containing multiple eigenvalues after the first iteration step
- viennacl::vector<NumericT> g_right_mult;
-
- //! number of eigenvalues less than the left limit of the eigenvalue intervals containing multiple eigenvalues
- viennacl::vector<unsigned int> g_left_count_mult;
-
- //! number of eigenvalues less than the right limit of the eigenvalue intervals containing multiple eigenvalues
- viennacl::vector<unsigned int> g_right_count_mult;
- //! start addresses in g_left_mult etc. of blocks of intervals containing more than one eigenvalue after the first step
- viennacl::vector<unsigned int> g_blocks_mult;
-
- //! accumulated number of intervals in g_left_mult etc. of blocks of intervals containing more than one eigenvalue after the first step
- viennacl::vector<unsigned int> g_blocks_mult_sum;
-
- //! eigenvalues that have been generated in the second step from intervals that still contained multiple eigenvalues after the first step
- viennacl::vector<NumericT> g_lambda_mult;
-
- //! eigenvalue index of intervals that have been generated in the second processing step
- viennacl::vector<unsigned int> g_pos_mult;
-
- /** @brief Initialize variables and memory for result
- *
- * @param mat_size size of the matrix
- */
- ResultDataLarge(unsigned int mat_size) :
- std_eigenvalues(mat_size), g_num_one(0), g_num_blocks_mult(0),
- g_left_one(mat_size), g_right_one(mat_size), g_pos_one(mat_size),
- g_left_mult(mat_size), g_right_mult(mat_size), g_left_count_mult(mat_size), g_right_count_mult(mat_size),
- g_blocks_mult(mat_size), g_blocks_mult_sum(mat_size), g_lambda_mult(mat_size), g_pos_mult(mat_size) {}
-
-};
-} // namespace detail
-} // namespace linalg
-} // namespace viennacl
-#endif // #ifndef VIENNACL_LINALG_DETAIL_STRUCTS_HPP_
-
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp
deleted file mode 100755
index 883d202..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BISECT_UTIL_HPP_
-#define VIENNACL_LINALG_DETAIL_BISECT_UTIL_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-
-/** @file viennacl/linalg/detail//bisect/util.hpp
- @brief Utility functions
-
- Implementation based on the sample provided with the CUDA 6.0 SDK, for which
- the creation of derivative works is allowed by including the following statement:
- "This software contains source code provided by NVIDIA Corporation."
-*/
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-
-////////////////////////////////////////////////////////////////////////////////
-//! Minimum
-////////////////////////////////////////////////////////////////////////////////
-template<class T>
-#ifdef __CUDACC__
-__host__ __device__
-#endif
-T
-min(const T &lhs, const T &rhs)
-{
-
- return (lhs < rhs) ? lhs : rhs;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Maximum
-////////////////////////////////////////////////////////////////////////////////
-template<class T>
-#ifdef __CUDACC__
-__host__ __device__
-#endif
-T
-max(const T &lhs, const T &rhs)
-{
-
- return (lhs < rhs) ? rhs : lhs;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Sign of number (float)
-////////////////////////////////////////////////////////////////////////////////
-#ifdef __CUDACC__
-__host__ __device__
-#endif
-inline float
-sign_f(const float &val)
-{
- return (val < 0.0f) ? -1.0f : 1.0f;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Sign of number (double)
-////////////////////////////////////////////////////////////////////////////////
-#ifdef __CUDACC__
-__host__ __device__
-#endif
-inline double
-sign_d(const double &val)
-{
- return (val < 0.0) ? -1.0 : 1.0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//! Get the number of blocks that are required to process \a num_threads with
-//! \a num_threads_blocks threads per block
-///////////////////////////////////////////////////////////////////////////////
-extern "C"
-inline
-unsigned int
-getNumBlocksLinear(const unsigned int num_threads,
- const unsigned int num_threads_block)
-{
- const unsigned int block_rem =
- ((num_threads % num_threads_block) != 0) ? 1 : 0;
- return (num_threads / num_threads_block) + block_rem;
-}
-} // namespace detail
-} // namespace linalg
-} // namespace viennacl
-#endif // #ifndef VIENNACL_LINALG_DETAIL_UTIL_HPP_
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp
deleted file mode 100644
index 1540e2d..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp
+++ /dev/null
@@ -1,617 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
-#define VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/linalg/detail/ilu/block_ilu.hpp
- @brief Implementations of incomplete block factorization preconditioners
-*/
-
-#include <vector>
-#include <cmath>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/detail/ilu/common.hpp"
-#include "viennacl/linalg/detail/ilu/ilu0.hpp"
-#include "viennacl/linalg/detail/ilu/ilut.hpp"
-
-#include <map>
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
- /** @brief Helper range class for representing a subvector of a larger buffer. */
- template<typename VectorT, typename NumericT, typename SizeT = vcl_size_t>
- class ilu_vector_range
- {
- public:
- ilu_vector_range(VectorT & v,
- SizeT start_index,
- SizeT vec_size
- ) : vec_(v), start_(start_index), size_(vec_size) {}
-
- NumericT & operator()(SizeT index)
- {
- assert(index < size_ && bool("Index out of bounds!"));
- return vec_[start_ + index];
- }
-
- NumericT & operator[](SizeT index)
- {
- assert(index < size_ && bool("Index out of bounds!"));
- return vec_[start_ + index];
- }
-
- SizeT size() const { return size_; }
-
- private:
- VectorT & vec_;
- SizeT start_;
- SizeT size_;
- };
-
- /** @brief Extracts a diagonal block from a larger system matrix
- *
- * @param A The full matrix
- * @param diagonal_block_A The output matrix, to which the extracted block is written to
- * @param start_index First row- and column-index of the block
- * @param stop_index First row- and column-index beyond the block
- */
- template<typename NumericT>
- void extract_block_matrix(viennacl::compressed_matrix<NumericT> const & A,
- viennacl::compressed_matrix<NumericT> & diagonal_block_A,
- vcl_size_t start_index,
- vcl_size_t stop_index
- )
- {
- assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
- assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
- assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
-
- NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
- unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
- unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
-
- NumericT * output_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(diagonal_block_A.handle());
- unsigned int * output_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle1());
- unsigned int * output_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle2());
-
- vcl_size_t output_counter = 0;
- for (vcl_size_t row = start_index; row < stop_index; ++row)
- {
- unsigned int buffer_col_start = A_row_buffer[row];
- unsigned int buffer_col_end = A_row_buffer[row+1];
-
- output_row_buffer[row - start_index] = static_cast<unsigned int>(output_counter);
-
- for (unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index)
- {
- unsigned int col = A_col_buffer[buf_index];
- if (col < start_index)
- continue;
-
- if (col >= static_cast<unsigned int>(stop_index))
- continue;
-
- output_col_buffer[output_counter] = static_cast<unsigned int>(col - start_index);
- output_elements[output_counter] = A_elements[buf_index];
- ++output_counter;
- }
- output_row_buffer[row - start_index + 1] = static_cast<unsigned int>(output_counter);
- }
- }
-
-} // namespace detail
-
-
-
-/** @brief A block ILU preconditioner class, can be supplied to solve()-routines
- *
- * @tparam MatrixType Type of the system matrix
- * @tparam ILUTag Type of the tag identifiying the ILU preconditioner to be used on each block.
-*/
-template<typename MatrixT, typename ILUTag>
-class block_ilu_precond
-{
-typedef typename MatrixT::value_type ScalarType;
-
-public:
- typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block
-
-
- block_ilu_precond(MatrixT const & mat,
- ILUTag const & tag,
- vcl_size_t num_blocks = 8
- ) : tag_(tag), L_blocks(num_blocks), U_blocks(num_blocks)
- {
- // Set up vector of block indices:
- block_indices_.resize(num_blocks);
- for (vcl_size_t i=0; i<num_blocks; ++i)
- {
- vcl_size_t start_index = ( i * mat.size1()) / num_blocks;
- vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks;
-
- block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
- }
-
- //initialize preconditioner:
- //std::cout << "Start CPU precond" << std::endl;
- init(mat);
- //std::cout << "End CPU precond" << std::endl;
- }
-
- block_ilu_precond(MatrixT const & mat,
- ILUTag const & tag,
- index_vector_type const & block_boundaries
- ) : tag_(tag), block_indices_(block_boundaries), L_blocks(block_boundaries.size()), U_blocks(block_boundaries.size())
- {
- //initialize preconditioner:
- //std::cout << "Start CPU precond" << std::endl;
- init(mat);
- //std::cout << "End CPU precond" << std::endl;
- }
-
-
- template<typename VectorT>
- void apply(VectorT & vec) const
- {
- for (vcl_size_t i=0; i<block_indices_.size(); ++i)
- apply_dispatch(vec, i, ILUTag());
- }
-
-private:
- void init(MatrixT const & A)
- {
- viennacl::context host_context(viennacl::MAIN_MEMORY);
- viennacl::compressed_matrix<ScalarType> mat(host_context);
-
- viennacl::copy(A, mat);
-
- unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
-
-#ifdef VIENNACL_WITH_OPENMP
- #pragma omp parallel for
-#endif
- for (long i2=0; i2<static_cast<long>(block_indices_.size()); ++i2)
- {
- vcl_size_t i = static_cast<vcl_size_t>(i2);
- // Step 1: Extract blocks
- vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first;
- vcl_size_t block_nnz = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first];
- viennacl::compressed_matrix<ScalarType> mat_block(block_size, block_size, block_nnz, host_context);
-
- detail::extract_block_matrix(mat, mat_block, block_indices_[i].first, block_indices_[i].second);
-
- // Step 2: Precondition blocks:
- viennacl::switch_memory_context(L_blocks[i], host_context);
- viennacl::switch_memory_context(U_blocks[i], host_context);
- init_dispatch(mat_block, L_blocks[i], U_blocks[i], tag_);
- }
-
- }
-
- void init_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
- viennacl::compressed_matrix<ScalarType> & L,
- viennacl::compressed_matrix<ScalarType> & U,
- viennacl::linalg::ilu0_tag)
- {
- (void)U;
- L = mat_block;
- viennacl::linalg::precondition(L, tag_);
- }
-
- void init_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
- viennacl::compressed_matrix<ScalarType> & L,
- viennacl::compressed_matrix<ScalarType> & U,
- viennacl::linalg::ilut_tag)
- {
- L.resize(mat_block.size1(), mat_block.size2());
- U.resize(mat_block.size1(), mat_block.size2());
- viennacl::linalg::precondition(mat_block, L, U, tag_);
- }
-
- template<typename VectorT>
- void apply_dispatch(VectorT & vec, vcl_size_t i, viennacl::linalg::ilu0_tag) const
- {
- detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, L_blocks[i].size2());
-
- unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle1());
- unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle2());
- ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle());
-
- viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag());
- viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), upper_tag());
- }
-
- template<typename VectorT>
- void apply_dispatch(VectorT & vec, vcl_size_t i, viennacl::linalg::ilut_tag) const
- {
- detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, L_blocks[i].size2());
-
- {
- unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle1());
- unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle2());
- ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle());
-
- viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag());
- }
-
- {
- unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks[i].handle1());
- unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks[i].handle2());
- ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U_blocks[i].handle());
-
- viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, U_blocks[i].size2(), upper_tag());
- }
- }
-
- ILUTag tag_;
- index_vector_type block_indices_;
- std::vector< viennacl::compressed_matrix<ScalarType> > L_blocks;
- std::vector< viennacl::compressed_matrix<ScalarType> > U_blocks;
-};
-
-
-
-
-
-/** @brief ILUT preconditioner class, can be supplied to solve()-routines.
-*
-* Specialization for compressed_matrix
-*/
-template<typename NumericT, unsigned int AlignmentV, typename ILUTagT>
-class block_ilu_precond< compressed_matrix<NumericT, AlignmentV>, ILUTagT>
-{
- typedef compressed_matrix<NumericT, AlignmentV> MatrixType;
-
-public:
- typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block
-
-
- block_ilu_precond(MatrixType const & mat,
- ILUTagT const & tag,
- vcl_size_t num_blocks = 8
- ) : tag_(tag),
- block_indices_(num_blocks),
- gpu_block_indices_(),
- gpu_L_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)),
- gpu_U_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)),
- gpu_D_(mat.size1(), viennacl::context(viennacl::MAIN_MEMORY)),
- L_blocks_(num_blocks),
- U_blocks_(num_blocks)
- {
- // Set up vector of block indices:
- block_indices_.resize(num_blocks);
- for (vcl_size_t i=0; i<num_blocks; ++i)
- {
- vcl_size_t start_index = ( i * mat.size1()) / num_blocks;
- vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks;
-
- block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
- }
-
- //initialize preconditioner:
- //std::cout << "Start CPU precond" << std::endl;
- init(mat);
- //std::cout << "End CPU precond" << std::endl;
- }
-
- block_ilu_precond(MatrixType const & mat,
- ILUTagT const & tag,
- index_vector_type const & block_boundaries
- ) : tag_(tag),
- block_indices_(block_boundaries),
- gpu_block_indices_(),
- gpu_L_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)),
- gpu_U_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)),
- gpu_D_(mat.size1(), viennacl::context(viennacl::MAIN_MEMORY)),
- L_blocks_(block_boundaries.size()),
- U_blocks_(block_boundaries.size())
- {
- //initialize preconditioner:
- //std::cout << "Start CPU precond" << std::endl;
- init(mat);
- //std::cout << "End CPU precond" << std::endl;
- }
-
-
- void apply(vector<NumericT> & vec) const
- {
- viennacl::linalg::detail::block_inplace_solve(trans(gpu_L_trans_), gpu_block_indices_, block_indices_.size(), gpu_D_,
- vec,
- viennacl::linalg::unit_lower_tag());
-
- viennacl::linalg::detail::block_inplace_solve(trans(gpu_U_trans_), gpu_block_indices_, block_indices_.size(), gpu_D_,
- vec,
- viennacl::linalg::upper_tag());
-
- //apply_cpu(vec);
- }
-
-
-private:
-
- void init(MatrixType const & A)
- {
- viennacl::context host_context(viennacl::MAIN_MEMORY);
- viennacl::compressed_matrix<NumericT> mat(host_context);
-
- mat = A;
-
- unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
-
-#ifdef VIENNACL_WITH_OPENMP
- #pragma omp parallel for
-#endif
- for (long i=0; i<static_cast<long>(block_indices_.size()); ++i)
- {
- // Step 1: Extract blocks
- vcl_size_t block_size = block_indices_[static_cast<vcl_size_t>(i)].second - block_indices_[static_cast<vcl_size_t>(i)].first;
- vcl_size_t block_nnz = row_buffer[block_indices_[static_cast<vcl_size_t>(i)].second] - row_buffer[block_indices_[static_cast<vcl_size_t>(i)].first];
- viennacl::compressed_matrix<NumericT> mat_block(block_size, block_size, block_nnz, host_context);
-
- detail::extract_block_matrix(mat, mat_block, block_indices_[static_cast<vcl_size_t>(i)].first, block_indices_[static_cast<vcl_size_t>(i)].second);
-
- // Step 2: Precondition blocks:
- viennacl::switch_memory_context(L_blocks_[static_cast<vcl_size_t>(i)], host_context);
- viennacl::switch_memory_context(U_blocks_[static_cast<vcl_size_t>(i)], host_context);
- init_dispatch(mat_block, L_blocks_[static_cast<vcl_size_t>(i)], U_blocks_[static_cast<vcl_size_t>(i)], tag_);
- }
-
- /*
- * copy resulting preconditioner back to GPU:
- */
- viennacl::backend::typesafe_host_array<unsigned int> block_indices_uint(gpu_block_indices_, 2 * block_indices_.size());
- for (vcl_size_t i=0; i<block_indices_.size(); ++i)
- {
- block_indices_uint.set(2*i, block_indices_[i].first);
- block_indices_uint.set(2*i + 1, block_indices_[i].second);
- }
-
- viennacl::backend::memory_create(gpu_block_indices_, block_indices_uint.raw_size(), viennacl::traits::context(A), block_indices_uint.get());
-
- blocks_to_device(A);
-
- }
-
- // Copy computed preconditioned blocks to OpenCL device
- void blocks_to_device(MatrixType const & A)
- {
- gpu_L_trans_.resize(A.size1(), A.size2());
- gpu_U_trans_.resize(A.size1(), A.size2());
- gpu_D_.resize(A.size1());
-
- unsigned int * L_trans_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_L_trans_.handle1());
- unsigned int * U_trans_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_U_trans_.handle1());
-
- //
- // Count elements per row
- //
-#ifdef VIENNACL_WITH_OPENMP
- #pragma omp parallel for
-#endif
- for (long block_index2 = 0; block_index2 < static_cast<long>(L_blocks_.size()); ++block_index2)
- {
- vcl_size_t block_index = vcl_size_t(block_index2);
-
- unsigned int block_start = static_cast<unsigned int>(block_indices_[block_index].first);
- unsigned int block_stop = static_cast<unsigned int>(block_indices_[block_index].second);
-
- unsigned int const * L_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle1());
- unsigned int const * L_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle2());
-
- // zero row array of L:
- std::fill(L_trans_row_buffer + block_start,
- L_trans_row_buffer + block_stop,
- static_cast<unsigned int>(0));
-
- // count number of elements per row:
- for (vcl_size_t row = 0; row < L_blocks_[block_index].size1(); ++row)
- {
- unsigned int col_start = L_row_buffer[row];
- unsigned int col_end = L_row_buffer[row+1];
-
- for (unsigned int j = col_start; j < col_end; ++j)
- {
- unsigned int col = L_col_buffer[j];
- if (col < static_cast<unsigned int>(row))
- L_trans_row_buffer[col + block_start] += 1;
- }
- }
-
- ////// same for U
-
- unsigned int const * U_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle1());
- unsigned int const * U_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle2());
-
- // zero row array of U:
- std::fill(U_trans_row_buffer + block_start,
- U_trans_row_buffer + block_stop,
- static_cast<unsigned int>(0));
-
- // count number of elements per row:
- for (vcl_size_t row = 0; row < U_blocks_[block_index].size1(); ++row)
- {
- unsigned int col_start = U_row_buffer[row];
- unsigned int col_end = U_row_buffer[row+1];
-
- for (unsigned int j = col_start; j < col_end; ++j)
- {
- unsigned int col = U_col_buffer[j];
- if (col > row)
- U_trans_row_buffer[col + block_start] += 1;
- }
- }
- }
-
-
- //
- // Exclusive scan on row buffer (feel free to add parallelization here)
- //
- unsigned int current_value = 0;
- for (vcl_size_t i=0; i<gpu_L_trans_.size1(); ++i)
- {
- unsigned int tmp = L_trans_row_buffer[i];
- L_trans_row_buffer[i] = current_value;
- current_value += tmp;
- }
- gpu_L_trans_.reserve(current_value);
-
- current_value = 0;
- for (vcl_size_t i=0; i<gpu_U_trans_.size1(); ++i)
- {
- unsigned int tmp = U_trans_row_buffer[i];
- U_trans_row_buffer[i] = current_value;
- current_value += tmp;
- }
- gpu_U_trans_.reserve(current_value);
-
-
- //
- // Fill with data
- //
- unsigned int * L_trans_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_L_trans_.handle2());
- NumericT * L_trans_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_L_trans_.handle());
-
- unsigned int * U_trans_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_U_trans_.handle2());
- NumericT * U_trans_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_U_trans_.handle());
-
- NumericT * D_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_D_.handle());
-
- std::vector<unsigned int> offset_L(gpu_L_trans_.size1());
- std::vector<unsigned int> offset_U(gpu_U_trans_.size1());
-
-#ifdef VIENNACL_WITH_OPENMP
- #pragma omp parallel for
-#endif
- for (long block_index2 = 0; block_index2 < static_cast<long>(L_blocks_.size()); ++block_index2)
- {
- vcl_size_t block_index = vcl_size_t(block_index2);
- unsigned int block_start = static_cast<unsigned int>(block_indices_[block_index].first);
-
- unsigned int const * L_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle1());
- unsigned int const * L_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle2());
- NumericT const * L_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT >(L_blocks_[block_index].handle());
-
-
- // write L_trans:
- for (vcl_size_t row = 0; row < L_blocks_[block_index].size1(); ++row)
- {
- unsigned int col_start = L_row_buffer[row];
- unsigned int col_end = L_row_buffer[row+1];
-
- for (unsigned int j = col_start; j < col_end; ++j)
- {
- unsigned int col = L_col_buffer[j];
- if (col < row)
- {
- unsigned int row_trans = col + block_start;
- unsigned int k = L_trans_row_buffer[row_trans] + offset_L[row_trans];
- offset_L[row_trans] += 1;
-
- L_trans_col_buffer[k] = static_cast<unsigned int>(row) + block_start;
- L_trans_elements[k] = L_elements[j];
- }
- }
- }
-
- unsigned int const * U_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle1());
- unsigned int const * U_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle2());
- NumericT const * U_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT >(U_blocks_[block_index].handle());
-
- // write U_trans and D:
- for (vcl_size_t row = 0; row < U_blocks_[block_index].size1(); ++row)
- {
- unsigned int col_start = U_row_buffer[row];
- unsigned int col_end = U_row_buffer[row+1];
-
- for (unsigned int j = col_start; j < col_end; ++j)
- {
- unsigned int row_trans = U_col_buffer[j] + block_start;
- unsigned int k = U_trans_row_buffer[row_trans] + offset_U[row_trans];
-
- if (row_trans == row + block_start) // entry for D
- {
- D_elements[row_trans] = U_elements[j];
- }
- else if (row_trans > row + block_start) //entry for U
- {
- offset_U[row_trans] += 1;
-
- U_trans_col_buffer[k] = static_cast<unsigned int>(row) + block_start;
- U_trans_elements[k] = U_elements[j];
- }
- }
- }
-
- }
-
- //
- // Send to destination device:
- //
- viennacl::switch_memory_context(gpu_L_trans_, viennacl::traits::context(A));
- viennacl::switch_memory_context(gpu_U_trans_, viennacl::traits::context(A));
- viennacl::switch_memory_context(gpu_D_, viennacl::traits::context(A));
- }
-
- void init_dispatch(viennacl::compressed_matrix<NumericT> const & mat_block,
- viennacl::compressed_matrix<NumericT> & L,
- viennacl::compressed_matrix<NumericT> & U,
- viennacl::linalg::ilu0_tag)
- {
- L = mat_block;
- viennacl::linalg::precondition(L, tag_);
- U = L; // fairly poor workaround...
- }
-
- void init_dispatch(viennacl::compressed_matrix<NumericT> const & mat_block,
- viennacl::compressed_matrix<NumericT> & L,
- viennacl::compressed_matrix<NumericT> & U,
- viennacl::linalg::ilut_tag)
- {
- L.resize(mat_block.size1(), mat_block.size2());
- U.resize(mat_block.size1(), mat_block.size2());
- viennacl::linalg::precondition(mat_block, L, U, tag_);
- }
-
-
- ILUTagT tag_;
- index_vector_type block_indices_;
- viennacl::backend::mem_handle gpu_block_indices_;
- viennacl::compressed_matrix<NumericT> gpu_L_trans_;
- viennacl::compressed_matrix<NumericT> gpu_U_trans_;
- viennacl::vector<NumericT> gpu_D_;
-
- std::vector<MatrixType> L_blocks_;
- std::vector<MatrixType> U_blocks_;
-};
-
-
-}
-}
-
-
-
-
-#endif
-
-
-
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp
deleted file mode 100644
index 7628cdb..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp
+++ /dev/null
@@ -1,316 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_CHOW_PATEL_ILU_HPP_
-#define VIENNACL_LINALG_DETAIL_CHOW_PATEL_ILU_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2016, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
- Portions of this software are copyright by UChicago Argonne, LLC.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp@iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/linalg/detail/ilu/chow_patel_ilu.hpp
- @brief Implementations of incomplete factorization preconditioners with fine-grained parallelism.
-
- Based on "Fine-Grained Parallel Incomplete LU Factorization" by Chow and Patel, SIAM J. Sci. Comput., vol. 37, no. 2, pp. C169-C193
-*/
-
-#include <vector>
-#include <cmath>
-#include <iostream>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/detail/ilu/common.hpp"
-#include "viennacl/linalg/ilu_operations.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/backend/memory.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-
-/** @brief A tag for incomplete LU and incomplete Cholesky factorization with static pattern (Parallel-ILU0, Parallel ICC0)
-*/
-class chow_patel_tag
-{
-public:
- /** @brief Constructor allowing to set the number of sweeps and Jacobi iterations.
- *
- * @param num_sweeps Number of sweeps in setup phase
- * @param num_jacobi_iters Number of Jacobi iterations for each triangular 'solve' when applying the preconditioner to a vector
- */
- chow_patel_tag(vcl_size_t num_sweeps = 3, vcl_size_t num_jacobi_iters = 2) : sweeps_(num_sweeps), jacobi_iters_(num_jacobi_iters) {}
-
- /** @brief Returns the number of sweeps (i.e. number of nonlinear iterations) in the solver setup stage */
- vcl_size_t sweeps() const { return sweeps_; }
- /** @brief Sets the number of sweeps (i.e. number of nonlinear iterations) in the solver setup stage */
- void sweeps(vcl_size_t num) { sweeps_ = num; }
-
- /** @brief Returns the number of Jacobi iterations (i.e. applications of x_{k+1} = (I - D^{-1}R)x_k + D^{-1} b) for each of the solves y = U^{-1} x and z = L^{-1} y) for each preconditioner application. */
- vcl_size_t jacobi_iters() const { return jacobi_iters_; }
- /** @brief Sets the number of Jacobi iterations for each triangular 'solve' when applying the preconditioner to a vector. */
- void jacobi_iters(vcl_size_t num) { jacobi_iters_ = num; }
-
-private:
- vcl_size_t sweeps_;
- vcl_size_t jacobi_iters_;
-};
-
-namespace detail
-{
- /** @brief Implementation of the parallel ICC0 factorization, Algorithm 3 in Chow-Patel paper.
- *
- * Rather than dealing with a column-major upper triangular matrix U, we use the lower-triangular matrix L such that A is approximately given by LL^T.
- * The advantage is that L is readily available in row-major format.
- */
- template<typename NumericT>
- void precondition(viennacl::compressed_matrix<NumericT> const & A,
- viennacl::compressed_matrix<NumericT> & L,
- viennacl::vector<NumericT> & diag_L,
- viennacl::compressed_matrix<NumericT> & L_trans,
- chow_patel_tag const & tag)
- {
- // make sure L and U have correct dimensions:
- L.resize(A.size1(), A.size2(), false);
-
- // initialize L and U from values in A:
- viennacl::linalg::extract_L(A, L);
-
- // diagonally scale values from A in L:
- viennacl::linalg::icc_scale(A, L);
-
- viennacl::vector<NumericT> aij_L(L.nnz(), viennacl::traits::context(A));
- viennacl::backend::memory_copy(L.handle(), aij_L.handle(), 0, 0, sizeof(NumericT) * L.nnz());
-
- // run sweeps:
- for (vcl_size_t i=0; i<tag.sweeps(); ++i)
- viennacl::linalg::icc_chow_patel_sweep(L, aij_L);
-
- // transpose L to obtain L_trans:
- viennacl::linalg::ilu_transpose(L, L_trans);
-
- // form (I - D_L^{-1}L) and (I - D_U^{-1} U), with U := L_trans
- viennacl::linalg::ilu_form_neumann_matrix(L, diag_L);
- viennacl::linalg::ilu_form_neumann_matrix(L_trans, diag_L);
- }
-
-
- /** @brief Implementation of the parallel ILU0 factorization, Algorithm 2 in Chow-Patel paper. */
- template<typename NumericT>
- void precondition(viennacl::compressed_matrix<NumericT> const & A,
- viennacl::compressed_matrix<NumericT> & L,
- viennacl::vector<NumericT> & diag_L,
- viennacl::compressed_matrix<NumericT> & U,
- viennacl::vector<NumericT> & diag_U,
- chow_patel_tag const & tag)
- {
- // make sure L and U have correct dimensions:
- L.resize(A.size1(), A.size2(), false);
- U.resize(A.size1(), A.size2(), false);
-
- // initialize L and U from values in A:
- viennacl::linalg::extract_LU(A, L, U);
-
- // diagonally scale values from A in L and U:
- viennacl::linalg::ilu_scale(A, L, U);
-
- // transpose storage layout of U from CSR to CSC via transposition
- viennacl::compressed_matrix<NumericT> U_trans;
- viennacl::linalg::ilu_transpose(U, U_trans);
-
- // keep entries of a_ij for the sweeps
- viennacl::vector<NumericT> aij_L (L.nnz(), viennacl::traits::context(A));
- viennacl::vector<NumericT> aij_U_trans(U_trans.nnz(), viennacl::traits::context(A));
-
- viennacl::backend::memory_copy( L.handle(), aij_L.handle(), 0, 0, sizeof(NumericT) * L.nnz());
- viennacl::backend::memory_copy(U_trans.handle(), aij_U_trans.handle(), 0, 0, sizeof(NumericT) * U_trans.nnz());
-
- // run sweeps:
- for (vcl_size_t i=0; i<tag.sweeps(); ++i)
- viennacl::linalg::ilu_chow_patel_sweep(L, aij_L, U_trans, aij_U_trans);
-
- // transpose U_trans back:
- viennacl::linalg::ilu_transpose(U_trans, U);
-
- // form (I - D_L^{-1}L) and (I - D_U^{-1} U)
- viennacl::linalg::ilu_form_neumann_matrix(L, diag_L);
- viennacl::linalg::ilu_form_neumann_matrix(U, diag_U);
- }
-
-}
-
-
-
-
-/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines
-*/
-template<typename MatrixT>
-class chow_patel_icc_precond
-{
- // only works with compressed_matrix!
- typedef typename MatrixT::CHOW_PATEL_ICC_ONLY_WORKS_WITH_COMPRESSED_MATRIX error_type;
-};
-
-
-/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines.
-*
-* Specialization for compressed_matrix
-*/
-template<typename NumericT, unsigned int AlignmentV>
-class chow_patel_icc_precond< viennacl::compressed_matrix<NumericT, AlignmentV> >
-{
-
-public:
- chow_patel_icc_precond(viennacl::compressed_matrix<NumericT, AlignmentV> const & A, chow_patel_tag const & tag)
- : tag_(tag),
- L_(0, 0, 0, viennacl::traits::context(A)),
- diag_L_(A.size1(), viennacl::traits::context(A)),
- L_trans_(0, 0, 0, viennacl::traits::context(A)),
- x_k_(A.size1(), viennacl::traits::context(A)),
- b_(A.size1(), viennacl::traits::context(A))
- {
- viennacl::linalg::detail::precondition(A, L_, diag_L_, L_trans_, tag_);
- }
-
- /** @brief Preconditioner application: LL^Tx = b, computed via Ly = b, L^Tx = y using Jacobi iterations.
- *
- * L contains (I - D_L^{-1}L), L_trans contains (I - D_L^{-1}L^T) where D denotes the respective diagonal matrix
- */
- template<typename VectorT>
- void apply(VectorT & vec) const
- {
- //
- // y = L^{-1} b through Jacobi iteration y_{k+1} = (I - D^{-1}L)y_k + D^{-1}x
- //
- b_ = viennacl::linalg::element_div(vec, diag_L_);
- x_k_ = b_;
- for (unsigned int i=0; i<tag_.jacobi_iters(); ++i)
- {
- vec = viennacl::linalg::prod(L_, x_k_);
- x_k_ = vec + b_;
- }
-
- //
- // x = U^{-1} y through Jacobi iteration x_{k+1} = (I - D^{-1}L^T)x_k + D^{-1}b
- //
- b_ = viennacl::linalg::element_div(x_k_, diag_L_);
- x_k_ = b_; // x_1 if x_0 \equiv 0
- for (unsigned int i=0; i<tag_.jacobi_iters(); ++i)
- {
- vec = viennacl::linalg::prod(L_trans_, x_k_);
- x_k_ = vec + b_;
- }
-
- // return result:
- vec = x_k_;
- }
-
-private:
- chow_patel_tag tag_;
- viennacl::compressed_matrix<NumericT> L_;
- viennacl::vector<NumericT> diag_L_;
- viennacl::compressed_matrix<NumericT> L_trans_;
-
- mutable viennacl::vector<NumericT> x_k_;
- mutable viennacl::vector<NumericT> b_;
-};
-
-
-
-
-
-
-/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines
-*/
-template<typename MatrixT>
-class chow_patel_ilu_precond
-{
- // only works with compressed_matrix!
- typedef typename MatrixT::CHOW_PATEL_ILU_ONLY_WORKS_WITH_COMPRESSED_MATRIX error_type;
-};
-
-
-/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines.
-*
-* Specialization for compressed_matrix
-*/
-template<typename NumericT, unsigned int AlignmentV>
-class chow_patel_ilu_precond< viennacl::compressed_matrix<NumericT, AlignmentV> >
-{
-
-public:
- chow_patel_ilu_precond(viennacl::compressed_matrix<NumericT, AlignmentV> const & A, chow_patel_tag const & tag)
- : tag_(tag),
- L_(0, 0, 0, viennacl::traits::context(A)),
- diag_L_(A.size1(), viennacl::traits::context(A)),
- U_(0, 0, 0, viennacl::traits::context(A)),
- diag_U_(A.size1(), viennacl::traits::context(A)),
- x_k_(A.size1(), viennacl::traits::context(A)),
- b_(A.size1(), viennacl::traits::context(A))
- {
- viennacl::linalg::detail::precondition(A, L_, diag_L_, U_, diag_U_, tag_);
- }
-
- /** @brief Preconditioner application: LUx = b, computed via Ly = b, Ux = y using Jacobi iterations.
- *
- * L_ contains (I - D_L^{-1}L), U_ contains (I - D_U^{-1}U) where D denotes the respective diagonal matrix
- */
- template<typename VectorT>
- void apply(VectorT & vec) const
- {
- //
- // y = L^{-1} b through Jacobi iteration y_{k+1} = (I - D^{-1}L)y_k + D^{-1}x
- //
- b_ = viennacl::linalg::element_div(vec, diag_L_);
- x_k_ = b_;
- for (unsigned int i=0; i<tag_.jacobi_iters(); ++i)
- {
- vec = viennacl::linalg::prod(L_, x_k_);
- x_k_ = vec + b_;
- }
-
- //
- // x = U^{-1} y through Jacobi iteration x_{k+1} = (I - D^{-1}U)x_k + D^{-1}b
- //
- b_ = viennacl::linalg::element_div(x_k_, diag_U_);
- x_k_ = b_; // x_1 if x_0 \equiv 0
- for (unsigned int i=0; i<tag_.jacobi_iters(); ++i)
- {
- vec = viennacl::linalg::prod(U_, x_k_);
- x_k_ = vec + b_;
- }
-
- // return result:
- vec = x_k_;
- }
-
-private:
- chow_patel_tag tag_;
- viennacl::compressed_matrix<NumericT> L_;
- viennacl::vector<NumericT> diag_L_;
- viennacl::compressed_matrix<NumericT> U_;
- viennacl::vector<NumericT> diag_U_;
-
- mutable viennacl::vector<NumericT> x_k_;
- mutable viennacl::vector<NumericT> b_;
-};
-
-
-} // namespace linalg
-} // namespace viennacl
-
-
-#endif
-
-
-