You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ap...@apache.org on 2016/06/08 21:40:23 UTC
[26/51] [partial] mahout git commit: (nojira) add native-viennaCL
module to codebase. closes apache/mahout#241
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp
new file mode 100644
index 0000000..93b0cba
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp
@@ -0,0 +1,263 @@
+#ifndef VIENNACL_LINALG_DETAIL_ILU_COMMON_HPP_
+#define VIENNACL_LINALG_DETAIL_ILU_COMMON_HPP_
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/detail/ilu/common.hpp
+ @brief Common routines used within ILU-type preconditioners
+*/
+
+#include <vector>
+#include <cmath>
+#include <iostream>
+#include <map>
+#include <list>
+
+#include "viennacl/forwards.h"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/backend/memory.hpp"
+
+#include "viennacl/linalg/host_based/common.hpp"
+#include "viennacl/linalg/misc_operations.hpp"
+
+namespace viennacl
+{
+namespace linalg
+{
+namespace detail
+{
+
+
+//
+// Level Scheduling Setup for ILU:
+//
+
+template<typename NumericT, unsigned int AlignmentV>
+void level_scheduling_setup_impl(viennacl::compressed_matrix<NumericT, AlignmentV> const & LU,
+ viennacl::vector<NumericT> const & diagonal_LU,
+ std::list<viennacl::backend::mem_handle> & row_index_arrays,
+ std::list<viennacl::backend::mem_handle> & row_buffers,
+ std::list<viennacl::backend::mem_handle> & col_buffers,
+ std::list<viennacl::backend::mem_handle> & element_buffers,
+ std::list<vcl_size_t> & row_elimination_num_list,
+ bool setup_U)
+{
+ NumericT const * diagonal_buf = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(diagonal_LU.handle());
+ NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(LU.handle());
+ unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU.handle1());
+ unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU.handle2());
+
+ //
+ // Step 1: Determine row elimination order for each row and build up meta information about the number of entries taking part in each elimination step:
+ //
+ std::vector<vcl_size_t> row_elimination(LU.size1());
+ std::map<vcl_size_t, std::map<vcl_size_t, vcl_size_t> > row_entries_per_elimination_step;
+
+ vcl_size_t max_elimination_runs = 0;
+ for (vcl_size_t row2 = 0; row2 < LU.size1(); ++row2)
+ {
+ vcl_size_t row = setup_U ? (LU.size1() - row2) - 1 : row2;
+
+ vcl_size_t row_begin = row_buffer[row];
+ vcl_size_t row_end = row_buffer[row+1];
+ vcl_size_t elimination_index = 0; //Note: first run corresponds to elimination_index = 1 (otherwise, type issues with int <-> unsigned int would arise
+ for (vcl_size_t i = row_begin; i < row_end; ++i)
+ {
+ unsigned int col = col_buffer[i];
+ if ( (!setup_U && col < row) || (setup_U && col > row) )
+ {
+ elimination_index = std::max<vcl_size_t>(elimination_index, row_elimination[col]);
+ row_entries_per_elimination_step[row_elimination[col]][row] += 1;
+ }
+ }
+ row_elimination[row] = elimination_index + 1;
+ max_elimination_runs = std::max<vcl_size_t>(max_elimination_runs, elimination_index + 1);
+ }
+
+ //std::cout << "Number of elimination runs: " << max_elimination_runs << std::endl;
+
+ //
+ // Step 2: Build row-major elimination matrix for each elimination step
+ //
+
+ //std::cout << "Elimination order: " << std::endl;
+ //for (vcl_size_t i=0; i<row_elimination.size(); ++i)
+ // std::cout << row_elimination[i] << ", ";
+ //std::cout << std::endl;
+
+ //vcl_size_t summed_rows = 0;
+ for (vcl_size_t elimination_run = 1; elimination_run <= max_elimination_runs; ++elimination_run)
+ {
+ std::map<vcl_size_t, vcl_size_t> const & current_elimination_info = row_entries_per_elimination_step[elimination_run];
+
+ // count cols and entries handled in this elimination step
+ vcl_size_t num_tainted_cols = current_elimination_info.size();
+ vcl_size_t num_entries = 0;
+
+ for (std::map<vcl_size_t, vcl_size_t>::const_iterator it = current_elimination_info.begin();
+ it != current_elimination_info.end();
+ ++it)
+ num_entries += it->second;
+
+ //std::cout << "num_entries: " << num_entries << std::endl;
+ //std::cout << "num_tainted_cols: " << num_tainted_cols << std::endl;
+
+ if (num_tainted_cols > 0)
+ {
+ row_index_arrays.push_back(viennacl::backend::mem_handle());
+ viennacl::backend::switch_memory_context<unsigned int>(row_index_arrays.back(), viennacl::traits::context(LU));
+ viennacl::backend::typesafe_host_array<unsigned int> elim_row_index_array(row_index_arrays.back(), num_tainted_cols);
+
+ row_buffers.push_back(viennacl::backend::mem_handle());
+ viennacl::backend::switch_memory_context<unsigned int>(row_buffers.back(), viennacl::traits::context(LU));
+ viennacl::backend::typesafe_host_array<unsigned int> elim_row_buffer(row_buffers.back(), num_tainted_cols + 1);
+
+ col_buffers.push_back(viennacl::backend::mem_handle());
+ viennacl::backend::switch_memory_context<unsigned int>(col_buffers.back(), viennacl::traits::context(LU));
+ viennacl::backend::typesafe_host_array<unsigned int> elim_col_buffer(col_buffers.back(), num_entries);
+
+ element_buffers.push_back(viennacl::backend::mem_handle());
+ viennacl::backend::switch_memory_context<NumericT>(element_buffers.back(), viennacl::traits::context(LU));
+ std::vector<NumericT> elim_elements_buffer(num_entries);
+
+ row_elimination_num_list.push_back(num_tainted_cols);
+
+ vcl_size_t k=0;
+ vcl_size_t nnz_index = 0;
+ elim_row_buffer.set(0, 0);
+
+ for (std::map<vcl_size_t, vcl_size_t>::const_iterator it = current_elimination_info.begin();
+ it != current_elimination_info.end();
+ ++it)
+ {
+ //vcl_size_t col = setup_U ? (elimination_matrix.size() - it->first) - 1 : col2;
+ vcl_size_t row = it->first;
+ elim_row_index_array.set(k, row);
+
+ vcl_size_t row_begin = row_buffer[row];
+ vcl_size_t row_end = row_buffer[row+1];
+ for (vcl_size_t i = row_begin; i < row_end; ++i)
+ {
+ unsigned int col = col_buffer[i];
+ if ( (!setup_U && col < row) || (setup_U && col > row) ) //entry of L/U
+ {
+ if (row_elimination[col] == elimination_run) // this entry is substituted in this run
+ {
+ elim_col_buffer.set(nnz_index, col);
+ elim_elements_buffer[nnz_index] = setup_U ? elements[i] / diagonal_buf[it->first] : elements[i];
+ ++nnz_index;
+ }
+ }
+ }
+
+ elim_row_buffer.set(++k, nnz_index);
+ }
+
+ //
+ // Wrap in memory_handles:
+ //
+ viennacl::backend::memory_create(row_index_arrays.back(), elim_row_index_array.raw_size(), viennacl::traits::context(row_index_arrays.back()), elim_row_index_array.get());
+ viennacl::backend::memory_create(row_buffers.back(), elim_row_buffer.raw_size(), viennacl::traits::context(row_buffers.back()), elim_row_buffer.get());
+ viennacl::backend::memory_create(col_buffers.back(), elim_col_buffer.raw_size(), viennacl::traits::context(col_buffers.back()), elim_col_buffer.get());
+ viennacl::backend::memory_create(element_buffers.back(), sizeof(NumericT) * elim_elements_buffer.size(), viennacl::traits::context(element_buffers.back()), &(elim_elements_buffer[0]));
+ }
+
+ // Print some info:
+ //std::cout << "Eliminated columns in run " << elimination_run << ": " << num_tainted_cols << " (tainted columns: " << num_tainted_cols << ")" << std::endl;
+ //summed_rows += eliminated_rows_in_run;
+ //if (eliminated_rows_in_run == 0)
+ // break;
+ }
+ //std::cout << "Eliminated rows: " << summed_rows << " out of " << row_elimination.size() << std::endl;
+}
+
+
+template<typename NumericT, unsigned int AlignmentV>
+void level_scheduling_setup_L(viennacl::compressed_matrix<NumericT, AlignmentV> const & LU,
+ viennacl::vector<NumericT> const & diagonal_LU,
+ std::list<viennacl::backend::mem_handle> & row_index_arrays,
+ std::list<viennacl::backend::mem_handle> & row_buffers,
+ std::list<viennacl::backend::mem_handle> & col_buffers,
+ std::list<viennacl::backend::mem_handle> & element_buffers,
+ std::list<vcl_size_t> & row_elimination_num_list)
+{
+ level_scheduling_setup_impl(LU, diagonal_LU, row_index_arrays, row_buffers, col_buffers, element_buffers, row_elimination_num_list, false);
+}
+
+
+//
+// Multifrontal setup of U:
+//
+
+template<typename NumericT, unsigned int AlignmentV>
+void level_scheduling_setup_U(viennacl::compressed_matrix<NumericT, AlignmentV> const & LU,
+ viennacl::vector<NumericT> const & diagonal_LU,
+ std::list<viennacl::backend::mem_handle> & row_index_arrays,
+ std::list<viennacl::backend::mem_handle> & row_buffers,
+ std::list<viennacl::backend::mem_handle> & col_buffers,
+ std::list<viennacl::backend::mem_handle> & element_buffers,
+ std::list<vcl_size_t> & row_elimination_num_list)
+{
+ level_scheduling_setup_impl(LU, diagonal_LU, row_index_arrays, row_buffers, col_buffers, element_buffers, row_elimination_num_list, true);
+}
+
+
+//
+// Multifrontal substitution (both L and U). Will partly be moved to single_threaded/opencl/cuda implementations
+//
+template<typename NumericT>
+void level_scheduling_substitute(viennacl::vector<NumericT> & vec,
+ std::list<viennacl::backend::mem_handle> const & row_index_arrays,
+ std::list<viennacl::backend::mem_handle> const & row_buffers,
+ std::list<viennacl::backend::mem_handle> const & col_buffers,
+ std::list<viennacl::backend::mem_handle> const & element_buffers,
+ std::list<vcl_size_t> const & row_elimination_num_list)
+{
+ typedef typename std::list< viennacl::backend::mem_handle >::const_iterator ListIterator;
+ ListIterator row_index_array_it = row_index_arrays.begin();
+ ListIterator row_buffers_it = row_buffers.begin();
+ ListIterator col_buffers_it = col_buffers.begin();
+ ListIterator element_buffers_it = element_buffers.begin();
+ typename std::list< vcl_size_t>::const_iterator row_elimination_num_it = row_elimination_num_list.begin();
+ for (vcl_size_t i=0; i<row_index_arrays.size(); ++i)
+ {
+ viennacl::linalg::detail::level_scheduling_substitute(vec, *row_index_array_it, *row_buffers_it, *col_buffers_it, *element_buffers_it, *row_elimination_num_it);
+
+ ++row_index_array_it;
+ ++row_buffers_it;
+ ++col_buffers_it;
+ ++element_buffers_it;
+ ++row_elimination_num_it;
+ }
+}
+
+
+
+
+
+} // namespace detail
+} // namespace linalg
+} // namespace viennacl
+
+
+
+
+#endif
+
+
+
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp
new file mode 100644
index 0000000..1c3191a
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp
@@ -0,0 +1,379 @@
+#ifndef VIENNACL_LINALG_DETAIL_ILU0_HPP_
+#define VIENNACL_LINALG_DETAIL_ILU0_HPP_
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/detail/ilu/ilu0.hpp
+ @brief Implementations of incomplete factorization preconditioners with static nonzero pattern.
+
+ Contributed by Evan Bollig.
+
+ ILU0 (Incomplete LU with zero fill-in)
+ - All preconditioner nonzeros exist at locations that were nonzero in the input matrix.
+ - The number of nonzeros in the output preconditioner are exactly the same number as the input matrix
+
+ Evan Bollig 3/30/12
+
+ Adapted from viennacl/linalg/detail/ilut.hpp
+
+ Low-level reimplementation by Karl Rupp in Nov 2012, increasing performance substantially. Also added level-scheduling.
+
+*/
+
+#include <vector>
+#include <cmath>
+#include <iostream>
+#include "viennacl/forwards.h"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/linalg/detail/ilu/common.hpp"
+#include "viennacl/compressed_matrix.hpp"
+#include "viennacl/backend/memory.hpp"
+
+#include "viennacl/linalg/host_based/common.hpp"
+
+#include <map>
+
+namespace viennacl
+{
+namespace linalg
+{
+
+/** @brief A tag for incomplete LU factorization with static pattern (ILU0)
+*/
+class ilu0_tag
+{
+public:
+ ilu0_tag(bool with_level_scheduling = false) : use_level_scheduling_(with_level_scheduling) {}
+
+ bool use_level_scheduling() const { return use_level_scheduling_; }
+ void use_level_scheduling(bool b) { use_level_scheduling_ = b; }
+
+private:
+ bool use_level_scheduling_;
+};
+
+
+/** @brief Implementation of a ILU-preconditioner with static pattern. Optimized version for CSR matrices.
+ *
+ * refer to the Algorithm in Saad's book (1996 edition)
+ *
+ * @param A The sparse matrix matrix. The result is directly written to A.
+ */
+template<typename NumericT>
+void precondition(viennacl::compressed_matrix<NumericT> & A, ilu0_tag const & /* tag */)
+{
+ assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
+ assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
+ assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
+
+ NumericT * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
+ unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
+ unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
+
+ // Note: Line numbers in the following refer to the algorithm in Saad's book
+
+ for (vcl_size_t i=1; i<A.size1(); ++i) // Line 1
+ {
+ unsigned int row_i_begin = row_buffer[i];
+ unsigned int row_i_end = row_buffer[i+1];
+ for (unsigned int buf_index_k = row_i_begin; buf_index_k < row_i_end; ++buf_index_k) //Note: We do not assume that the column indices within a row are sorted
+ {
+ unsigned int k = col_buffer[buf_index_k];
+ if (k >= i)
+ continue; //Note: We do not assume that the column indices within a row are sorted
+
+ unsigned int row_k_begin = row_buffer[k];
+ unsigned int row_k_end = row_buffer[k+1];
+
+ // get a_kk:
+ NumericT a_kk = 0;
+ for (unsigned int buf_index_akk = row_k_begin; buf_index_akk < row_k_end; ++buf_index_akk)
+ {
+ if (col_buffer[buf_index_akk] == k)
+ {
+ a_kk = elements[buf_index_akk];
+ break;
+ }
+ }
+
+ NumericT & a_ik = elements[buf_index_k];
+ a_ik /= a_kk; //Line 3
+
+ for (unsigned int buf_index_j = row_i_begin; buf_index_j < row_i_end; ++buf_index_j) //Note: We do not assume that the column indices within a row are sorted
+ {
+ unsigned int j = col_buffer[buf_index_j];
+ if (j <= k)
+ continue;
+
+ // determine a_kj:
+ NumericT a_kj = 0;
+ for (unsigned int buf_index_akj = row_k_begin; buf_index_akj < row_k_end; ++buf_index_akj)
+ {
+ if (col_buffer[buf_index_akj] == j)
+ {
+ a_kj = elements[buf_index_akj];
+ break;
+ }
+ }
+
+ //a_ij -= a_ik * a_kj
+ elements[buf_index_j] -= a_ik * a_kj; //Line 5
+ }
+ }
+ }
+
+}
+
+
+/** @brief ILU0 preconditioner class, can be supplied to solve()-routines
+*/
+template<typename MatrixT>
+class ilu0_precond
+{
+ typedef typename MatrixT::value_type NumericType;
+
+public:
+ ilu0_precond(MatrixT const & mat, ilu0_tag const & tag) : tag_(tag), LU_()
+ {
+ //initialize preconditioner:
+ //std::cout << "Start CPU precond" << std::endl;
+ init(mat);
+ //std::cout << "End CPU precond" << std::endl;
+ }
+
+ template<typename VectorT>
+ void apply(VectorT & vec) const
+ {
+ unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_.handle1());
+ unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_.handle2());
+ NumericType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(LU_.handle());
+
+ viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, LU_.size2(), unit_lower_tag());
+ viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, LU_.size2(), upper_tag());
+ }
+
+private:
+ void init(MatrixT const & mat)
+ {
+ viennacl::context host_context(viennacl::MAIN_MEMORY);
+ viennacl::switch_memory_context(LU_, host_context);
+
+ viennacl::copy(mat, LU_);
+ viennacl::linalg::precondition(LU_, tag_);
+ }
+
+ ilu0_tag tag_;
+ viennacl::compressed_matrix<NumericType> LU_;
+};
+
+
+/** @brief ILU0 preconditioner class, can be supplied to solve()-routines.
+*
+* Specialization for compressed_matrix
+*/
+template<typename NumericT, unsigned int AlignmentV>
+class ilu0_precond< viennacl::compressed_matrix<NumericT, AlignmentV> >
+{
+ typedef viennacl::compressed_matrix<NumericT, AlignmentV> MatrixType;
+
+public:
+ ilu0_precond(MatrixType const & mat, ilu0_tag const & tag)
+ : tag_(tag),
+ LU_(mat.size1(), mat.size2(), viennacl::traits::context(mat))
+ {
+ //initialize preconditioner:
+ //std::cout << "Start GPU precond" << std::endl;
+ init(mat);
+ //std::cout << "End GPU precond" << std::endl;
+ }
+
+ void apply(viennacl::vector<NumericT> & vec) const
+ {
+ viennacl::context host_context(viennacl::MAIN_MEMORY);
+ if (vec.handle().get_active_handle_id() != viennacl::MAIN_MEMORY)
+ {
+ if (tag_.use_level_scheduling())
+ {
+ //std::cout << "Using multifrontal on GPU..." << std::endl;
+ detail::level_scheduling_substitute(vec,
+ multifrontal_L_row_index_arrays_,
+ multifrontal_L_row_buffers_,
+ multifrontal_L_col_buffers_,
+ multifrontal_L_element_buffers_,
+ multifrontal_L_row_elimination_num_list_);
+
+ vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_);
+
+ detail::level_scheduling_substitute(vec,
+ multifrontal_U_row_index_arrays_,
+ multifrontal_U_row_buffers_,
+ multifrontal_U_col_buffers_,
+ multifrontal_U_element_buffers_,
+ multifrontal_U_row_elimination_num_list_);
+ }
+ else
+ {
+ viennacl::context old_context = viennacl::traits::context(vec);
+ viennacl::switch_memory_context(vec, host_context);
+ viennacl::linalg::inplace_solve(LU_, vec, unit_lower_tag());
+ viennacl::linalg::inplace_solve(LU_, vec, upper_tag());
+ viennacl::switch_memory_context(vec, old_context);
+ }
+ }
+ else //apply ILU0 directly on CPU
+ {
+ if (tag_.use_level_scheduling())
+ {
+ //std::cout << "Using multifrontal..." << std::endl;
+ detail::level_scheduling_substitute(vec,
+ multifrontal_L_row_index_arrays_,
+ multifrontal_L_row_buffers_,
+ multifrontal_L_col_buffers_,
+ multifrontal_L_element_buffers_,
+ multifrontal_L_row_elimination_num_list_);
+
+ vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_);
+
+ detail::level_scheduling_substitute(vec,
+ multifrontal_U_row_index_arrays_,
+ multifrontal_U_row_buffers_,
+ multifrontal_U_col_buffers_,
+ multifrontal_U_element_buffers_,
+ multifrontal_U_row_elimination_num_list_);
+ }
+ else
+ {
+ viennacl::linalg::inplace_solve(LU_, vec, unit_lower_tag());
+ viennacl::linalg::inplace_solve(LU_, vec, upper_tag());
+ }
+ }
+ }
+
+ vcl_size_t levels() const { return multifrontal_L_row_index_arrays_.size(); }
+
+private:
+ void init(MatrixType const & mat)
+ {
+ viennacl::context host_context(viennacl::MAIN_MEMORY);
+ viennacl::switch_memory_context(LU_, host_context);
+ LU_ = mat;
+ viennacl::linalg::precondition(LU_, tag_);
+
+ if (!tag_.use_level_scheduling())
+ return;
+
+ // multifrontal part:
+ viennacl::switch_memory_context(multifrontal_U_diagonal_, host_context);
+ multifrontal_U_diagonal_.resize(LU_.size1(), false);
+ host_based::detail::row_info(LU_, multifrontal_U_diagonal_, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL);
+
+ detail::level_scheduling_setup_L(LU_,
+ multifrontal_U_diagonal_, //dummy
+ multifrontal_L_row_index_arrays_,
+ multifrontal_L_row_buffers_,
+ multifrontal_L_col_buffers_,
+ multifrontal_L_element_buffers_,
+ multifrontal_L_row_elimination_num_list_);
+
+
+ detail::level_scheduling_setup_U(LU_,
+ multifrontal_U_diagonal_,
+ multifrontal_U_row_index_arrays_,
+ multifrontal_U_row_buffers_,
+ multifrontal_U_col_buffers_,
+ multifrontal_U_element_buffers_,
+ multifrontal_U_row_elimination_num_list_);
+
+ //
+ // Bring to device if necessary:
+ //
+
+ // L:
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_index_arrays_.begin();
+ it != multifrontal_L_row_index_arrays_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_buffers_.begin();
+ it != multifrontal_L_row_buffers_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_col_buffers_.begin();
+ it != multifrontal_L_col_buffers_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_element_buffers_.begin();
+ it != multifrontal_L_element_buffers_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat));
+
+
+ // U:
+
+ viennacl::switch_memory_context(multifrontal_U_diagonal_, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_index_arrays_.begin();
+ it != multifrontal_U_row_index_arrays_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_buffers_.begin();
+ it != multifrontal_U_row_buffers_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_col_buffers_.begin();
+ it != multifrontal_U_col_buffers_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_element_buffers_.begin();
+ it != multifrontal_U_element_buffers_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat));
+
+ }
+
+ ilu0_tag tag_;
+ viennacl::compressed_matrix<NumericT> LU_;
+
+ std::list<viennacl::backend::mem_handle> multifrontal_L_row_index_arrays_;
+ std::list<viennacl::backend::mem_handle> multifrontal_L_row_buffers_;
+ std::list<viennacl::backend::mem_handle> multifrontal_L_col_buffers_;
+ std::list<viennacl::backend::mem_handle> multifrontal_L_element_buffers_;
+ std::list<vcl_size_t> multifrontal_L_row_elimination_num_list_;
+
+ viennacl::vector<NumericT> multifrontal_U_diagonal_;
+ std::list<viennacl::backend::mem_handle> multifrontal_U_row_index_arrays_;
+ std::list<viennacl::backend::mem_handle> multifrontal_U_row_buffers_;
+ std::list<viennacl::backend::mem_handle> multifrontal_U_col_buffers_;
+ std::list<viennacl::backend::mem_handle> multifrontal_U_element_buffers_;
+ std::list<vcl_size_t> multifrontal_U_row_elimination_num_list_;
+
+};
+
+} // namespace linalg
+} // namespace viennacl
+
+
+#endif
+
+
+
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp
new file mode 100644
index 0000000..11ab842
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp
@@ -0,0 +1,597 @@
+#ifndef VIENNACL_LINALG_DETAIL_ILUT_HPP_
+#define VIENNACL_LINALG_DETAIL_ILUT_HPP_
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/detail/ilu/ilut.hpp
+ @brief Implementations of an incomplete factorization preconditioner with threshold (ILUT)
+*/
+
+#include <vector>
+#include <cmath>
+#include <iostream>
+#include "viennacl/forwards.h"
+#include "viennacl/tools/tools.hpp"
+
+#include "viennacl/linalg/detail/ilu/common.hpp"
+#include "viennacl/compressed_matrix.hpp"
+
+#include "viennacl/linalg/host_based/common.hpp"
+
+#include <map>
+
+namespace viennacl
+{
+namespace linalg
+{
+
+/** @brief A tag for incomplete LU factorization with threshold (ILUT)
+*/
+class ilut_tag
+{
+ public:
+ /** @brief The constructor.
+ *
+ * @param entries_per_row Number of nonzero entries per row in L and U. Note that L and U are stored in a single matrix, thus there are 2*entries_per_row in total.
+ * @param drop_tolerance The drop tolerance for ILUT
+ * @param with_level_scheduling Flag for enabling level scheduling on GPUs.
+ */
+ ilut_tag(unsigned int entries_per_row = 20,
+ double drop_tolerance = 1e-4,
+ bool with_level_scheduling = false)
+ : entries_per_row_(entries_per_row),
+ drop_tolerance_(drop_tolerance),
+ use_level_scheduling_(with_level_scheduling) {}
+
+ void set_drop_tolerance(double tol)
+ {
+ if (tol > 0)
+ drop_tolerance_ = tol;
+ }
+ double get_drop_tolerance() const { return drop_tolerance_; }
+
+ void set_entries_per_row(unsigned int e)
+ {
+ if (e > 0)
+ entries_per_row_ = e;
+ }
+
+ unsigned int get_entries_per_row() const { return entries_per_row_; }
+
+ bool use_level_scheduling() const { return use_level_scheduling_; }
+ void use_level_scheduling(bool b) { use_level_scheduling_ = b; }
+
+ private:
+ unsigned int entries_per_row_;
+ double drop_tolerance_;
+ bool use_level_scheduling_;
+};
+
+
+namespace detail
+{
+ /** @brief Helper struct for holding a sparse vector in linear memory. For internal use only.
+ *
+ * Unfortunately, the 'naive' implementation using a std::map<> is almost always too slow.
+ *
+ */
+ template<typename NumericT>
+ struct ilut_sparse_vector
+ {
+ ilut_sparse_vector(vcl_size_t alloc_size = 0) : size_(0), col_indices_(alloc_size), elements_(alloc_size) {}
+
+ void resize_if_bigger(vcl_size_t s)
+ {
+ if (s > elements_.size())
+ {
+ col_indices_.resize(s);
+ elements_.resize(s);
+ }
+ size_ = s;
+ }
+
+ vcl_size_t size_;
+ std::vector<unsigned int> col_indices_;
+ std::vector<NumericT> elements_;
+ };
+
+ /** @brief Subtracts a scaled sparse vector u from a sparse vector w and writes the output to z: z = w - alpha * u
+ *
+ * Sparsity pattern of u and w are usually different.
+ *
+ * @return Length of new vector
+ */
+ template<typename IndexT, typename NumericT>
+ IndexT merge_subtract_sparse_rows(IndexT const * w_coords, NumericT const * w_elements, IndexT w_size,
+ IndexT const * u_coords, NumericT const * u_elements, IndexT u_size, NumericT alpha,
+ IndexT * z_coords, NumericT * z_elements)
+ {
+ IndexT index_w = 0;
+ IndexT index_u = 0;
+ IndexT index_z = 0;
+
+ while (1)
+ {
+ if (index_w < w_size && index_u < u_size)
+ {
+ if (w_coords[index_w] < u_coords[index_u])
+ {
+ z_coords[index_z] = w_coords[index_w];
+ z_elements[index_z++] = w_elements[index_w++];
+ }
+ else if (w_coords[index_w] == u_coords[index_u])
+ {
+ z_coords[index_z] = w_coords[index_w];
+ z_elements[index_z++] = w_elements[index_w++] - alpha * u_elements[index_u++];
+ }
+ else
+ {
+ z_coords[index_z] = u_coords[index_u];
+ z_elements[index_z++] = - alpha * u_elements[index_u++];
+ }
+ }
+ else if (index_w == w_size && index_u < u_size)
+ {
+ z_coords[index_z] = u_coords[index_u];
+ z_elements[index_z++] = - alpha * u_elements[index_u++];
+ }
+ else if (index_w < w_size && index_u == u_size)
+ {
+ z_coords[index_z] = w_coords[index_w];
+ z_elements[index_z++] = w_elements[index_w++];
+ }
+ else
+ return index_z;
+ }
+ }
+
+ template<typename SizeT, typename NumericT>
+ void insert_with_value_sort(std::vector<std::pair<SizeT, NumericT> > & map,
+ SizeT index, NumericT value)
+ {
+ NumericT abs_value = std::fabs(value);
+ if (abs_value > 0)
+ {
+ // find first element with smaller absolute value:
+ std::size_t first_smaller_index = 0;
+ while (first_smaller_index < map.size() && std::fabs(map[first_smaller_index].second) > abs_value)
+ ++first_smaller_index;
+
+ std::pair<SizeT, NumericT> tmp(index, value);
+ for (std::size_t j=first_smaller_index; j<map.size(); ++j)
+ std::swap(map[j], tmp);
+ }
+ }
+
+}
+
+/** @brief Implementation of a ILU-preconditioner with threshold. Optimized implementation for compressed_matrix.
+*
+* refer to Algorithm 10.6 by Saad's book (1996 edition)
+*
+* @param A The input matrix. Either a compressed_matrix or of type std::vector< std::map<T, U> >
+* @param L The output matrix for L.
+* @param U The output matrix for U.
+* @param tag An ilut_tag in order to dispatch among several other preconditioners.
+*/
+template<typename NumericT>
+void precondition(viennacl::compressed_matrix<NumericT> const & A,
+ viennacl::compressed_matrix<NumericT> & L,
+ viennacl::compressed_matrix<NumericT> & U,
+ ilut_tag const & tag)
+{
+ assert(A.size1() == L.size1() && bool("Output matrix size mismatch") );
+ assert(A.size1() == U.size1() && bool("Output matrix size mismatch") );
+
+ L.reserve( tag.get_entries_per_row() * A.size1());
+ U.reserve((tag.get_entries_per_row() + 1) * A.size1());
+
+ vcl_size_t avg_nnz_per_row = static_cast<vcl_size_t>(A.nnz() / A.size1());
+ detail::ilut_sparse_vector<NumericT> w1(tag.get_entries_per_row() * (avg_nnz_per_row + 10));
+ detail::ilut_sparse_vector<NumericT> w2(tag.get_entries_per_row() * (avg_nnz_per_row + 10));
+ detail::ilut_sparse_vector<NumericT> * w_in = &w1;
+ detail::ilut_sparse_vector<NumericT> * w_out = &w2;
+ std::vector<NumericT> diagonal_U(A.size1());
+
+ NumericT const * elements_A = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
+ unsigned int const * row_buffer_A = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
+ unsigned int const * col_buffer_A = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
+
+ NumericT * elements_L = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(L.handle());
+ unsigned int * row_buffer_L = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.handle1()); row_buffer_L[0] = 0;
+ unsigned int * col_buffer_L = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.handle2());
+
+ NumericT * elements_U = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(U.handle());
+ unsigned int * row_buffer_U = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.handle1()); row_buffer_U[0] = 0;
+ unsigned int * col_buffer_U = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.handle2());
+
+ std::vector<std::pair<unsigned int, NumericT> > sorted_entries_L(tag.get_entries_per_row());
+ std::vector<std::pair<unsigned int, NumericT> > sorted_entries_U(tag.get_entries_per_row());
+
+ for (vcl_size_t i=0; i<viennacl::traits::size1(A); ++i) // Line 1
+ {
+ std::fill(sorted_entries_L.begin(), sorted_entries_L.end(), std::pair<unsigned int, NumericT>(0, NumericT(0)));
+ std::fill(sorted_entries_U.begin(), sorted_entries_U.end(), std::pair<unsigned int, NumericT>(0, NumericT(0)));
+
+ //line 2: set up w
+ w_in->resize_if_bigger(row_buffer_A[i+1] - row_buffer_A[i]);
+ NumericT row_norm = 0;
+ unsigned int k = 0;
+ for (unsigned int j = row_buffer_A[i]; j < row_buffer_A[i+1]; ++j, ++k)
+ {
+ w_in->col_indices_[k] = col_buffer_A[j];
+ NumericT entry = elements_A[j];
+ w_in->elements_[k] = entry;
+ row_norm += entry * entry;
+ }
+ row_norm = std::sqrt(row_norm);
+ NumericT tau_i = static_cast<NumericT>(tag.get_drop_tolerance()) * row_norm;
+
+ //line 3: Iterate over lower diagonal parts of A:
+ k = 0;
+ unsigned int current_col = (row_buffer_A[i+1] > row_buffer_A[i]) ? w_in->col_indices_[k] : static_cast<unsigned int>(i); // mind empty rows here!
+ while (current_col < i)
+ {
+ //line 4:
+ NumericT a_kk = diagonal_U[current_col];
+
+ NumericT w_k_entry = w_in->elements_[k] / a_kk;
+ w_in->elements_[k] = w_k_entry;
+
+ //lines 5,6: (dropping rule to w_k)
+ if ( std::fabs(w_k_entry) > tau_i)
+ {
+ //line 7:
+ unsigned int row_U_begin = row_buffer_U[current_col];
+ unsigned int row_U_end = row_buffer_U[current_col + 1];
+
+ if (row_U_end > row_U_begin)
+ {
+ w_out->resize_if_bigger(w_in->size_ + (row_U_end - row_U_begin) - 1);
+ w_out->size_ = detail::merge_subtract_sparse_rows(&(w_in->col_indices_[0]), &(w_in->elements_[0]), static_cast<unsigned int>(w_in->size_),
+ col_buffer_U + row_U_begin + 1, elements_U + row_U_begin + 1, (row_U_end - row_U_begin) - 1, w_k_entry,
+ &(w_out->col_indices_[0]), &(w_out->elements_[0])
+ );
+ ++k;
+ }
+ }
+ else // drop element
+ {
+ w_out->resize_if_bigger(w_in->size_ - 1);
+ for (unsigned int r = 0; r < k; ++r)
+ {
+ w_out->col_indices_[r] = w_in->col_indices_[r];
+ w_out->elements_[r] = w_in->elements_[r];
+ }
+ for (unsigned int r = k+1; r < w_in->size_; ++r)
+ {
+ w_out->col_indices_[r-1] = w_in->col_indices_[r];
+ w_out->elements_[r-1] = w_in->elements_[r];
+ }
+
+ // Note: No increment to k here, element was dropped!
+ }
+
+ // swap pointers to w1 and w2
+ std::swap(w_in, w_out);
+
+ // process next entry:
+ current_col = (k < w_in->size_) ? w_in->col_indices_[k] : static_cast<unsigned int>(i);
+ } // while()
+
+ // Line 10: Apply a dropping rule to w
+ // To do so, we write values to a temporary array
+ for (unsigned int r = 0; r < w_in->size_; ++r)
+ {
+ unsigned int col = w_in->col_indices_[r];
+ NumericT value = w_in->elements_[r];
+
+ if (col < i) // entry for L:
+ detail::insert_with_value_sort(sorted_entries_L, col, value);
+ else if (col == i) // do not drop diagonal element
+ {
+ diagonal_U[i] = value;
+ if (value <= 0 && value >= 0)
+ {
+ std::cerr << "ViennaCL: FATAL ERROR in ILUT(): Diagonal entry computed to zero (" << value << ") in row " << i << "!" << std::endl;
+ throw zero_on_diagonal_exception("ILUT zero diagonal!");
+ }
+ }
+ else // entry for U:
+ detail::insert_with_value_sort(sorted_entries_U, col, value);
+ }
+
+ //Lines 10-12: Apply a dropping rule to w, write the largest p values to L and U
+ unsigned int offset_L = row_buffer_L[i];
+ std::sort(sorted_entries_L.begin(), sorted_entries_L.end());
+ for (unsigned int j=0; j<tag.get_entries_per_row(); ++j)
+ if (std::fabs(sorted_entries_L[j].second) > 0)
+ {
+ col_buffer_L[offset_L] = sorted_entries_L[j].first;
+ elements_L[offset_L] = sorted_entries_L[j].second;
+ ++offset_L;
+ }
+ row_buffer_L[i+1] = offset_L;
+
+ unsigned int offset_U = row_buffer_U[i];
+ col_buffer_U[offset_U] = static_cast<unsigned int>(i);
+ elements_U[offset_U] = diagonal_U[i];
+ ++offset_U;
+ std::sort(sorted_entries_U.begin(), sorted_entries_U.end());
+ for (unsigned int j=0; j<tag.get_entries_per_row(); ++j)
+ if (std::fabs(sorted_entries_U[j].second) > 0)
+ {
+ col_buffer_U[offset_U] = sorted_entries_U[j].first;
+ elements_U[offset_U] = sorted_entries_U[j].second;
+ ++offset_U;
+ }
+ row_buffer_U[i+1] = offset_U;
+
+ } //for i
+}
+
+
+/** @brief ILUT preconditioner class, can be supplied to solve()-routines
+*/
+template<typename MatrixT>
+class ilut_precond
+{
+ typedef typename MatrixT::value_type NumericType;
+
+public:
+ ilut_precond(MatrixT const & mat, ilut_tag const & tag) : tag_(tag), L_(mat.size1(), mat.size2()), U_(mat.size1(), mat.size2())
+ {
+ //initialize preconditioner:
+ //std::cout << "Start CPU precond" << std::endl;
+ init(mat);
+ //std::cout << "End CPU precond" << std::endl;
+ }
+
+ template<typename VectorT>
+ void apply(VectorT & vec) const
+ {
+ //Note: Since vec can be a rather arbitrary vector type, we call the more generic version in the backend manually:
+ {
+ unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_.handle1());
+ unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_.handle2());
+ NumericType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(L_.handle());
+
+ viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, L_.size2(), unit_lower_tag());
+ }
+ {
+ unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_.handle1());
+ unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_.handle2());
+ NumericType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(U_.handle());
+
+ viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, U_.size2(), upper_tag());
+ }
+ }
+
+private:
+ void init(MatrixT const & mat)
+ {
+ viennacl::context host_context(viennacl::MAIN_MEMORY);
+ viennacl::compressed_matrix<NumericType> temp;
+ viennacl::switch_memory_context(temp, host_context);
+ viennacl::switch_memory_context(L_, host_context);
+ viennacl::switch_memory_context(U_, host_context);
+
+ viennacl::copy(mat, temp);
+
+ viennacl::linalg::precondition(temp, L_, U_, tag_);
+ }
+
+ ilut_tag tag_;
+ viennacl::compressed_matrix<NumericType> L_;
+ viennacl::compressed_matrix<NumericType> U_;
+};
+
+
+/** @brief ILUT preconditioner class, can be supplied to solve()-routines.
+*
+* Specialization for compressed_matrix
+*/
+template<typename NumericT, unsigned int AlignmentV>
+class ilut_precond< viennacl::compressed_matrix<NumericT, AlignmentV> >
+{
+typedef viennacl::compressed_matrix<NumericT, AlignmentV> MatrixType;
+
+public:
+ ilut_precond(MatrixType const & mat, ilut_tag const & tag)
+ : tag_(tag),
+ L_(mat.size1(), mat.size2(), viennacl::traits::context(mat)),
+ U_(mat.size1(), mat.size2(), viennacl::traits::context(mat))
+ {
+ //initialize preconditioner:
+ //std::cout << "Start GPU precond" << std::endl;
+ init(mat);
+ //std::cout << "End GPU precond" << std::endl;
+ }
+
+ void apply(viennacl::vector<NumericT> & vec) const
+ {
+ if (vec.handle().get_active_handle_id() != viennacl::MAIN_MEMORY)
+ {
+ if (tag_.use_level_scheduling())
+ {
+ //std::cout << "Using multifrontal on GPU..." << std::endl;
+ detail::level_scheduling_substitute(vec,
+ multifrontal_L_row_index_arrays_,
+ multifrontal_L_row_buffers_,
+ multifrontal_L_col_buffers_,
+ multifrontal_L_element_buffers_,
+ multifrontal_L_row_elimination_num_list_);
+
+ vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_);
+
+ detail::level_scheduling_substitute(vec,
+ multifrontal_U_row_index_arrays_,
+ multifrontal_U_row_buffers_,
+ multifrontal_U_col_buffers_,
+ multifrontal_U_element_buffers_,
+ multifrontal_U_row_elimination_num_list_);
+
+ }
+ else
+ {
+ viennacl::context host_context(viennacl::MAIN_MEMORY);
+ viennacl::context old_context = viennacl::traits::context(vec);
+ viennacl::switch_memory_context(vec, host_context);
+ viennacl::linalg::inplace_solve(L_, vec, unit_lower_tag());
+ viennacl::linalg::inplace_solve(U_, vec, upper_tag());
+ viennacl::switch_memory_context(vec, old_context);
+ }
+ }
+ else //apply ILUT directly:
+ {
+ viennacl::linalg::inplace_solve(L_, vec, unit_lower_tag());
+ viennacl::linalg::inplace_solve(U_, vec, upper_tag());
+ }
+ }
+
+private:
+ void init(MatrixType const & mat)
+ {
+ viennacl::context host_context(viennacl::MAIN_MEMORY);
+ viennacl::switch_memory_context(L_, host_context);
+ viennacl::switch_memory_context(U_, host_context);
+
+ if (viennacl::traits::context(mat).memory_type() == viennacl::MAIN_MEMORY)
+ {
+ viennacl::linalg::precondition(mat, L_, U_, tag_);
+ }
+ else //we need to copy to CPU
+ {
+ viennacl::compressed_matrix<NumericT> cpu_mat(mat.size1(), mat.size2(), viennacl::traits::context(mat));
+ viennacl::switch_memory_context(cpu_mat, host_context);
+
+ cpu_mat = mat;
+
+ viennacl::linalg::precondition(cpu_mat, L_, U_, tag_);
+ }
+
+ if (!tag_.use_level_scheduling())
+ return;
+
+ //
+ // multifrontal part:
+ //
+
+ viennacl::switch_memory_context(multifrontal_U_diagonal_, host_context);
+ multifrontal_U_diagonal_.resize(U_.size1(), false);
+ host_based::detail::row_info(U_, multifrontal_U_diagonal_, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL);
+
+ detail::level_scheduling_setup_L(L_,
+ multifrontal_U_diagonal_, //dummy
+ multifrontal_L_row_index_arrays_,
+ multifrontal_L_row_buffers_,
+ multifrontal_L_col_buffers_,
+ multifrontal_L_element_buffers_,
+ multifrontal_L_row_elimination_num_list_);
+
+
+ detail::level_scheduling_setup_U(U_,
+ multifrontal_U_diagonal_,
+ multifrontal_U_row_index_arrays_,
+ multifrontal_U_row_buffers_,
+ multifrontal_U_col_buffers_,
+ multifrontal_U_element_buffers_,
+ multifrontal_U_row_elimination_num_list_);
+
+ //
+ // Bring to device if necessary:
+ //
+
+ // L:
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_index_arrays_.begin();
+ it != multifrontal_L_row_index_arrays_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_buffers_.begin();
+ it != multifrontal_L_row_buffers_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_col_buffers_.begin();
+ it != multifrontal_L_col_buffers_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_element_buffers_.begin();
+ it != multifrontal_L_element_buffers_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat));
+
+
+ // U:
+
+ viennacl::switch_memory_context(multifrontal_U_diagonal_, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_index_arrays_.begin();
+ it != multifrontal_U_row_index_arrays_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_buffers_.begin();
+ it != multifrontal_U_row_buffers_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_col_buffers_.begin();
+ it != multifrontal_U_col_buffers_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
+
+ for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_element_buffers_.begin();
+ it != multifrontal_U_element_buffers_.end();
+ ++it)
+ viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat));
+
+
+ }
+
+ ilut_tag tag_;
+ viennacl::compressed_matrix<NumericT> L_;
+ viennacl::compressed_matrix<NumericT> U_;
+
+ std::list<viennacl::backend::mem_handle> multifrontal_L_row_index_arrays_;
+ std::list<viennacl::backend::mem_handle> multifrontal_L_row_buffers_;
+ std::list<viennacl::backend::mem_handle> multifrontal_L_col_buffers_;
+ std::list<viennacl::backend::mem_handle> multifrontal_L_element_buffers_;
+ std::list<vcl_size_t > multifrontal_L_row_elimination_num_list_;
+
+ viennacl::vector<NumericT> multifrontal_U_diagonal_;
+ std::list<viennacl::backend::mem_handle> multifrontal_U_row_index_arrays_;
+ std::list<viennacl::backend::mem_handle> multifrontal_U_row_buffers_;
+ std::list<viennacl::backend::mem_handle> multifrontal_U_col_buffers_;
+ std::list<viennacl::backend::mem_handle> multifrontal_U_element_buffers_;
+ std::list<vcl_size_t > multifrontal_U_row_elimination_num_list_;
+};
+
+} // namespace linalg
+} // namespace viennacl
+
+
+
+
+#endif
+
+
+
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp
new file mode 100644
index 0000000..0e2abb0
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp
@@ -0,0 +1,103 @@
+#ifndef VIENNACL_LINALG_DETAIL_OP_APPLIER_HPP
+#define VIENNACL_LINALG_DETAIL_OP_APPLIER_HPP
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/detail/op_applier.hpp
+ *
+ * @brief Defines the action of certain unary and binary operators and its arguments (for host execution).
+*/
+
+#include "viennacl/forwards.h"
+#include <cmath>
+
+namespace viennacl
+{
+namespace linalg
+{
+namespace detail
+{
+
+/** @brief Worker class for decomposing expression templates.
+ *
+ * @tparam A Type to which is assigned to
+ * @tparam OP One out of {op_assign, op_inplace_add, op_inplace_sub}
+ @ @tparam T Right hand side of the assignment
+*/
+template<typename OpT>
+struct op_applier
+{
+ typedef typename OpT::ERROR_UNKNOWN_OP_TAG_PROVIDED error_type;
+};
+
+/** \cond */
+template<>
+struct op_applier<op_element_binary<op_prod> >
+{
+ template<typename T>
+ static void apply(T & result, T const & x, T const & y) { result = x * y; }
+};
+
+template<>
+struct op_applier<op_element_binary<op_div> >
+{
+ template<typename T>
+ static void apply(T & result, T const & x, T const & y) { result = x / y; }
+};
+
+template<>
+struct op_applier<op_element_binary<op_pow> >
+{
+ template<typename T>
+ static void apply(T & result, T const & x, T const & y) { result = std::pow(x, y); }
+};
+
+#define VIENNACL_MAKE_UNARY_OP_APPLIER(funcname) \
+template<> \
+struct op_applier<op_element_unary<op_##funcname> > \
+{ \
+ template<typename T> \
+ static void apply(T & result, T const & x) { using namespace std; result = funcname(x); } \
+}
+
+VIENNACL_MAKE_UNARY_OP_APPLIER(abs);
+VIENNACL_MAKE_UNARY_OP_APPLIER(acos);
+VIENNACL_MAKE_UNARY_OP_APPLIER(asin);
+VIENNACL_MAKE_UNARY_OP_APPLIER(atan);
+VIENNACL_MAKE_UNARY_OP_APPLIER(ceil);
+VIENNACL_MAKE_UNARY_OP_APPLIER(cos);
+VIENNACL_MAKE_UNARY_OP_APPLIER(cosh);
+VIENNACL_MAKE_UNARY_OP_APPLIER(exp);
+VIENNACL_MAKE_UNARY_OP_APPLIER(fabs);
+VIENNACL_MAKE_UNARY_OP_APPLIER(floor);
+VIENNACL_MAKE_UNARY_OP_APPLIER(log);
+VIENNACL_MAKE_UNARY_OP_APPLIER(log10);
+VIENNACL_MAKE_UNARY_OP_APPLIER(sin);
+VIENNACL_MAKE_UNARY_OP_APPLIER(sinh);
+VIENNACL_MAKE_UNARY_OP_APPLIER(sqrt);
+VIENNACL_MAKE_UNARY_OP_APPLIER(tan);
+VIENNACL_MAKE_UNARY_OP_APPLIER(tanh);
+
+#undef VIENNACL_MAKE_UNARY_OP_APPLIER
+/** \endcond */
+
+}
+}
+}
+
+#endif // VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp
new file mode 100644
index 0000000..bd49b3b
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp
@@ -0,0 +1,86 @@
+#ifndef VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP
+#define VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/detail/op_executor.hpp
+ *
+ * @brief Defines the worker class for decomposing an expression tree into small chunks, which can be processed by the predefined operations in ViennaCL.
+*/
+
+#include "viennacl/forwards.h"
+
+namespace viennacl
+{
+namespace linalg
+{
+namespace detail
+{
+
+template<typename NumericT, typename B>
+bool op_aliasing(vector_base<NumericT> const & /*lhs*/, B const & /*b*/)
+{
+ return false;
+}
+
+template<typename NumericT>
+bool op_aliasing(vector_base<NumericT> const & lhs, vector_base<NumericT> const & b)
+{
+ return lhs.handle() == b.handle();
+}
+
+template<typename NumericT, typename LhsT, typename RhsT, typename OpT>
+bool op_aliasing(vector_base<NumericT> const & lhs, vector_expression<const LhsT, const RhsT, OpT> const & rhs)
+{
+ return op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs());
+}
+
+
+template<typename NumericT, typename B>
+bool op_aliasing(matrix_base<NumericT> const & /*lhs*/, B const & /*b*/)
+{
+ return false;
+}
+
+template<typename NumericT>
+bool op_aliasing(matrix_base<NumericT> const & lhs, matrix_base<NumericT> const & b)
+{
+ return lhs.handle() == b.handle();
+}
+
+template<typename NumericT, typename LhsT, typename RhsT, typename OpT>
+bool op_aliasing(matrix_base<NumericT> const & lhs, matrix_expression<const LhsT, const RhsT, OpT> const & rhs)
+{
+ return op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs());
+}
+
+
+/** @brief Worker class for decomposing expression templates.
+ *
+ * @tparam A Type to which is assigned to
+ * @tparam OP One out of {op_assign, op_inplace_add, op_inplace_sub}
+ @ @tparam T Right hand side of the assignment
+*/
+template<typename A, typename OP, typename T>
+struct op_executor {};
+
+}
+}
+}
+
+#endif // VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp
new file mode 100644
index 0000000..12ff77b
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp
@@ -0,0 +1,86 @@
+#ifndef VIENNACL_LINALG_DETAIL_SPAI_BLOCK_MATRIX_HPP
+#define VIENNACL_LINALG_DETAIL_SPAI_BLOCK_MATRIX_HPP
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include <utility>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <algorithm>
+#include <vector>
+#include "viennacl/ocl/backend.hpp"
+#include "viennacl/tools/tools.hpp"
+
+/** @file viennacl/linalg/detail/spai/block_matrix.hpp
+ @brief Implementation of a bunch of (small) matrices on GPU. Experimental.
+
+ SPAI code contributed by Nikolay Lukash
+*/
+
+namespace viennacl
+{
+namespace linalg
+{
+namespace detail
+{
+namespace spai
+{
+
+/**
+* @brief Represents contigious matrices on GPU
+*/
+
+class block_matrix
+{
+public:
+
+ ////////// non-const
+
+ /** @brief Returns a handle to the elements */
+ viennacl::ocl::handle<cl_mem>& handle(){ return elements_; }
+
+ /** @brief Returns a handle to the matrix dimensions */
+ viennacl::ocl::handle<cl_mem>& handle1() { return matrix_dimensions_; }
+
+ /** @brief Returns a handle to the start indices of matrix */
+ viennacl::ocl::handle<cl_mem>& handle2() { return start_block_inds_; }
+
+ ////////// const
+
+ /** @brief Returns a handle to the const elements */
+ const viennacl::ocl::handle<cl_mem>& handle() const { return elements_; }
+
+ /** @brief Returns a handle to the const matrix dimensions */
+ const viennacl::ocl::handle<cl_mem>& handle1() const { return matrix_dimensions_; }
+
+ /** @brief Returns a handle to the const start indices of matrix */
+ const viennacl::ocl::handle<cl_mem>& handle2() const { return start_block_inds_; }
+
+private:
+ viennacl::ocl::handle<cl_mem> elements_;
+ viennacl::ocl::handle<cl_mem> matrix_dimensions_;
+ viennacl::ocl::handle<cl_mem> start_block_inds_;
+};
+
+
+}
+}
+}
+}
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp
new file mode 100644
index 0000000..eee6aef
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp
@@ -0,0 +1,77 @@
+#ifndef VIENNACL_LINALG_DETAIL_SPAI_BLOCK_VECTOR_HPP
+#define VIENNACL_LINALG_DETAIL_SPAI_BLOCK_VECTOR_HPP
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include <utility>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <algorithm>
+#include <vector>
+#include "viennacl/ocl/backend.hpp"
+#include "viennacl/tools/tools.hpp"
+
+/** @file viennacl/linalg/detail/spai/block_vector.hpp
+ @brief Implementation of a bunch of vectors on GPU. Experimental.
+
+ SPAI code contributed by Nikolay Lukash
+*/
+
+namespace viennacl
+{
+namespace linalg
+{
+namespace detail
+{
+namespace spai
+{
+
+/**
+* @brief Represents a contiguous vector on the GPU to represent a concatentation of small vectors
+*/
+class block_vector
+{
+public:
+
+ ///////////// non-const
+
+ /** @brief Return handle to the elements */
+ viennacl::ocl::handle<cl_mem> & handle(){ return elements_; }
+
+ /** @brief Return handle to start indices */
+ viennacl::ocl::handle<cl_mem> & handle1() { return start_block_inds_; }
+
+ ///////////// const
+
+ /** @brief Return handle to the const elements */
+ const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; }
+
+ /** @brief Return handle to const start indices */
+ const viennacl::ocl::handle<cl_mem> & handle1() const { return start_block_inds_; }
+
+private:
+ viennacl::ocl::handle<cl_mem> elements_;
+ viennacl::ocl::handle<cl_mem> start_block_inds_;
+};
+
+}
+}
+}
+}
+#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp
new file mode 100644
index 0000000..fab81d7
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp
@@ -0,0 +1,402 @@
+#ifndef VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
+#define VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head: Karl Rupp rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License: MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+#include <utility>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <algorithm>
+#include <vector>
+#include <math.h>
+#include <map>
+
+//boost includes
+#include "boost/numeric/ublas/vector.hpp"
+#include "boost/numeric/ublas/matrix.hpp"
+#include "boost/numeric/ublas/matrix_proxy.hpp"
+#include "boost/numeric/ublas/vector_proxy.hpp"
+#include "boost/numeric/ublas/storage.hpp"
+#include "boost/numeric/ublas/io.hpp"
+#include "boost/numeric/ublas/lu.hpp"
+#include "boost/numeric/ublas/triangular.hpp"
+#include "boost/numeric/ublas/matrix_expression.hpp"
+
+// ViennaCL includes
+#include "viennacl/linalg/prod.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/compressed_matrix.hpp"
+#include "viennacl/linalg/sparse_matrix_operations.hpp"
+#include "viennacl/linalg/matrix_operations.hpp"
+#include "viennacl/scalar.hpp"
+#include "viennacl/linalg/cg.hpp"
+#include "viennacl/linalg/inner_prod.hpp"
+#include "viennacl/linalg/ilu.hpp"
+//#include <omp.h>
+
+/** @file viennacl/linalg/detail/spai/fspai.hpp
+ @brief Implementation of FSPAI. Experimental.
+*/
+
+namespace viennacl
+{
+namespace linalg
+{
+namespace detail
+{
+namespace spai
+{
+
+/** @brief A tag for FSPAI. Experimental.
+*
+* Contains values for the algorithm.
+* Must be passed to spai_precond constructor
+*/
+class fspai_tag
+{
+public:
+ /** @brief Constructor
+ *
+ * @param residual_norm_threshold Calculate until the norm of the residual falls below this threshold
+ * @param iteration_limit maximum number of iterations
+ * @param is_static determines if static version of SPAI should be used
+ * @param is_right determines if left or right preconditioner should be used
+ */
+ fspai_tag(
+ double residual_norm_threshold = 1e-3,
+ unsigned int iteration_limit = 5,
+ bool is_static = false,
+ bool is_right = false)
+ : residual_norm_threshold_(residual_norm_threshold),
+ iteration_limit_(iteration_limit),
+ is_static_(is_static),
+ is_right_(is_right) {}
+
+ inline double getResidualNormThreshold() const { return residual_norm_threshold_; }
+ inline unsigned long getIterationLimit () const { return iteration_limit_; }
+ inline bool getIsStatic() const { return is_static_; }
+ inline bool getIsRight() const { return is_right_; }
+ inline void setResidualNormThreshold(double residual_norm_threshold)
+ {
+ if (residual_norm_threshold > 0)
+ residual_norm_threshold_ = residual_norm_threshold;
+ }
+ inline void setIterationLimit(unsigned long iteration_limit)
+ {
+ if (iteration_limit > 0)
+ iteration_limit_ = iteration_limit;
+ }
+ inline void setIsRight(bool is_right) { is_right_ = is_right; }
+ inline void setIsStatic(bool is_static) { is_static_ = is_static; }
+
+private:
+ double residual_norm_threshold_;
+ unsigned long iteration_limit_;
+ bool is_static_;
+ bool is_right_;
+};
+
+
+//
+// Helper: Store A in an STL container of type, exploiting symmetry
+// Reason: ublas interface does not allow to iterate over nonzeros of a particular row without starting an iterator1 from the very beginning of the matrix...
+//
+template<typename MatrixT, typename NumericT>
+void sym_sparse_matrix_to_stl(MatrixT const & A, std::vector<std::map<unsigned int, NumericT> > & STL_A)
+{
+ STL_A.resize(A.size1());
+ for (typename MatrixT::const_iterator1 row_it = A.begin1();
+ row_it != A.end1();
+ ++row_it)
+ {
+ for (typename MatrixT::const_iterator2 col_it = row_it.begin();
+ col_it != row_it.end();
+ ++col_it)
+ {
+ if (col_it.index1() >= col_it.index2())
+ STL_A[col_it.index1()][static_cast<unsigned int>(col_it.index2())] = *col_it;
+ else
+ break; //go to next row
+ }
+ }
+}
+
+
+//
+// Generate index sets J_k, k=0,...,N-1
+//
+template<typename MatrixT>
+void generateJ(MatrixT const & A, std::vector<std::vector<vcl_size_t> > & J)
+{
+ for (typename MatrixT::const_iterator1 row_it = A.begin1();
+ row_it != A.end1();
+ ++row_it)
+ {
+ for (typename MatrixT::const_iterator2 col_it = row_it.begin();
+ col_it != row_it.end();
+ ++col_it)
+ {
+ if (col_it.index1() > col_it.index2()) //Matrix is symmetric, thus only work on lower triangular part
+ {
+ J[col_it.index2()].push_back(col_it.index1());
+ J[col_it.index1()].push_back(col_it.index2());
+ }
+ else
+ break; //go to next row
+ }
+ }
+}
+
+
+//
+// Extracts the blocks A(\tilde{J}_k, \tilde{J}_k) from A
+// Sets up y_k = A(\tilde{J}_k, k) for the inplace-solution after Cholesky-factoriation
+//
+template<typename NumericT, typename MatrixT, typename VectorT>
+void fill_blocks(std::vector< std::map<unsigned int, NumericT> > & A,
+ std::vector<MatrixT> & blocks,
+ std::vector<std::vector<vcl_size_t> > const & J,
+ std::vector<VectorT> & Y)
+{
+ for (vcl_size_t k=0; k<A.size(); ++k)
+ {
+ std::vector<vcl_size_t> const & Jk = J[k];
+ VectorT & yk = Y[k];
+ MatrixT & block_k = blocks[k];
+
+ yk.resize(Jk.size());
+ block_k.resize(Jk.size(), Jk.size());
+ block_k.clear();
+
+ for (vcl_size_t i=0; i<Jk.size(); ++i)
+ {
+ vcl_size_t row_index = Jk[i];
+ std::map<unsigned int, NumericT> & A_row = A[row_index];
+
+ //fill y_k:
+ yk[i] = A_row[static_cast<unsigned int>(k)];
+
+ for (vcl_size_t j=0; j<Jk.size(); ++j)
+ {
+ vcl_size_t col_index = Jk[j];
+ if (col_index <= row_index && A_row.find(static_cast<unsigned int>(col_index)) != A_row.end()) //block is symmetric, thus store only lower triangular part
+ block_k(i, j) = A_row[static_cast<unsigned int>(col_index)];
+ }
+ }
+ }
+}
+
+
+//
+// Perform Cholesky factorization of A inplace. Cf. Schwarz: Numerische Mathematik, vol 5, p. 58
+//
+template<typename MatrixT>
+void cholesky_decompose(MatrixT & A)
+{
+ for (vcl_size_t k=0; k<A.size2(); ++k)
+ {
+ if (A(k,k) <= 0)
+ {
+ std::cout << "k: " << k << std::endl;
+ std::cout << "A(k,k): " << A(k,k) << std::endl;
+ }
+
+ assert(A(k,k) > 0 && bool("Matrix not positive definite in Cholesky factorization."));
+
+ A(k,k) = std::sqrt(A(k,k));
+
+ for (vcl_size_t i=k+1; i<A.size1(); ++i)
+ {
+ A(i,k) /= A(k,k);
+ for (vcl_size_t j=k+1; j<=i; ++j)
+ A(i,j) -= A(i,k) * A(j,k);
+ }
+ }
+}
+
+
+//
+// Compute x in Ax = b, where A is already Cholesky factored (A = L L^T)
+//
+template<typename MatrixT, typename VectorT>
+void cholesky_solve(MatrixT const & L, VectorT & b)
+{
+ // inplace forward solve L x = b
+ for (vcl_size_t i=0; i<L.size1(); ++i)
+ {
+ for (vcl_size_t j=0; j<i; ++j)
+ b[i] -= L(i,j) * b[j];
+ b[i] /= L(i,i);
+ }
+
+ // inplace backward solve L^T x = b:
+ for (vcl_size_t i=L.size1()-1;; --i)
+ {
+ for (vcl_size_t k=i+1; k<L.size1(); ++k)
+ b[i] -= L(k,i) * b[k];
+ b[i] /= L(i,i);
+
+ if (i==0) //vcl_size_t might be unsigned, therefore manual check for equality with zero here
+ break;
+ }
+}
+
+
+
+//
+// Compute the Cholesky factor L from the sparse vectors y_k
+//
+template<typename MatrixT, typename VectorT>
+void computeL(MatrixT const & A,
+ MatrixT & L,
+ MatrixT & L_trans,
+ std::vector<VectorT> & Y,
+ std::vector<std::vector<vcl_size_t> > & J)
+{
+ typedef typename VectorT::value_type NumericType;
+ typedef std::vector<std::map<unsigned int, NumericType> > STLSparseMatrixType;
+
+ STLSparseMatrixType L_temp(A.size1());
+
+ for (vcl_size_t k=0; k<A.size1(); ++k)
+ {
+ std::vector<vcl_size_t> const & Jk = J[k];
+ VectorT const & yk = Y[k];
+
+ //compute L(k,k):
+ NumericType Lkk = A(k,k);
+ for (vcl_size_t i=0; i<Jk.size(); ++i)
+ Lkk -= A(Jk[i],k) * yk[i];
+
+ Lkk = NumericType(1) / std::sqrt(Lkk);
+ L_temp[k][static_cast<unsigned int>(k)] = Lkk;
+ L_trans(k,k) = Lkk;
+
+ //write lower diagonal entries:
+ for (vcl_size_t i=0; i<Jk.size(); ++i)
+ {
+ L_temp[Jk[i]][static_cast<unsigned int>(k)] = -Lkk * yk[i];
+ L_trans(k, Jk[i]) = -Lkk * yk[i];
+ }
+ } //for k
+
+
+ //build L from L_temp
+ for (vcl_size_t i=0; i<L_temp.size(); ++i)
+ for (typename std::map<unsigned int, NumericType>::const_iterator it = L_temp[i].begin();
+ it != L_temp[i].end();
+ ++it)
+ L(i, it->first) = it->second;
+}
+
+
+//
+// Top level FSPAI function
+//
+template<typename MatrixT>
+void computeFSPAI(MatrixT const & A,
+ MatrixT const & PatternA,
+ MatrixT & L,
+ MatrixT & L_trans,
+ fspai_tag)
+{
+ typedef typename MatrixT::value_type NumericT;
+ typedef boost::numeric::ublas::matrix<NumericT> DenseMatrixType;
+ typedef std::vector<std::map<unsigned int, NumericT> > SparseMatrixType;
+
+ //
+ // preprocessing: Store A in a STL container:
+ //
+ //std::cout << "Transferring to STL container:" << std::endl;
+ std::vector<std::vector<NumericT> > y_k(A.size1());
+ SparseMatrixType STL_A(A.size1());
+ sym_sparse_matrix_to_stl(A, STL_A);
+
+
+ //
+ // Step 1: Generate pattern indices
+ //
+ //std::cout << "computeFSPAI(): Generating pattern..." << std::endl;
+ std::vector<std::vector<vcl_size_t> > J(A.size1());
+ generateJ(PatternA, J);
+
+ //
+ // Step 2: Set up matrix blocks
+ //
+ //std::cout << "computeFSPAI(): Setting up matrix blocks..." << std::endl;
+ std::vector<DenseMatrixType> subblocks_A(A.size1());
+ fill_blocks(STL_A, subblocks_A, J, y_k);
+ STL_A.clear(); //not needed anymore
+
+ //
+ // Step 3: Cholesky-factor blocks
+ //
+ //std::cout << "computeFSPAI(): Cholesky-factorization..." << std::endl;
+ for (vcl_size_t i=0; i<subblocks_A.size(); ++i)
+ {
+ //std::cout << "Block before: " << subblocks_A[i] << std::endl;
+ cholesky_decompose(subblocks_A[i]);
+ //std::cout << "Block after: " << subblocks_A[i] << std::endl;
+ }
+
+
+ /*vcl_size_t num_bytes = 0;
+ for (vcl_size_t i=0; i<subblocks_A.size(); ++i)
+ num_bytes += 8*subblocks_A[i].size1()*subblocks_A[i].size2();*/
+ //std::cout << "Memory for FSPAI matrix: " << num_bytes / (1024.0 * 1024.0) << " MB" << std::endl;
+
+ //
+ // Step 4: Solve for y_k
+ //
+ //std::cout << "computeFSPAI(): Cholesky-solve..." << std::endl;
+ for (vcl_size_t i=0; i<y_k.size(); ++i)
+ {
+ if (subblocks_A[i].size1() > 0) //block might be empty...
+ {
+ //y_k[i].resize(subblocks_A[i].size1());
+ //std::cout << "y_k[" << i << "]: ";
+ //for (vcl_size_t j=0; j<y_k[i].size(); ++j)
+ // std::cout << y_k[i][j] << " ";
+ //std::cout << std::endl;
+ cholesky_solve(subblocks_A[i], y_k[i]);
+ }
+ }
+
+
+ //
+ // Step 5: Set up Cholesky factors L and L_trans
+ //
+ //std::cout << "computeFSPAI(): Computing L..." << std::endl;
+ L.resize(A.size1(), A.size2(), false);
+ L.reserve(A.nnz(), false);
+ L_trans.resize(A.size1(), A.size2(), false);
+ L_trans.reserve(A.nnz(), false);
+ computeL(A, L, L_trans, y_k, J);
+
+ //std::cout << "L: " << L << std::endl;
+}
+
+
+
+}
+}
+}
+}
+
+#endif