You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/01/18 08:15:01 UTC
[3/7] incubator-systemml git commit: [SYSTEMML-378] New sparse block
abstraction
[SYSTEMML-378] New sparse block abstraction
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7c06ef60
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7c06ef60
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7c06ef60
Branch: refs/heads/master
Commit: 7c06ef6068ee4fc3633d63041e4b5174d902d516
Parents: cf7d206
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sun Jan 17 23:00:41 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sun Jan 17 23:12:18 2016 -0800
----------------------------------------------------------------------
.../sysml/runtime/matrix/data/SparseBlock.java | 350 +++++++++++++++++++
1 file changed, 350 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7c06ef60/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java
new file mode 100644
index 0000000..478e900
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+package org.apache.sysml.runtime.matrix.data;
+
+import java.io.Serializable;
+import java.util.Iterator;
+
+/**
+ * This SparseBlock is an abstraction for different sparse matrix formats.
+ * Since the design is a tradeoff between performance and generality, we
+ * restrict this abstraction to row-major sparse representations for now.
+ * All sparse matrix block operations are supposed to be implemented
+ * against this abstraction in order to enable variability/extensibility.
+ *
+ * Example sparse format that can be implemented efficiently include
+ * CSR, MCSR, and - with performance drawbacks - COO.
+ *
+ */
+public abstract class SparseBlock implements Serializable
+{
+ private static final long serialVersionUID = -5008747088111141395L;
+
+ //internal configuration parameters for all sparse blocks
+ protected static final int INIT_CAPACITY = 4; //initial array capacity
+ protected static final double RESIZE_FACTOR1 = 2; //factor until reaching est nnz
+ protected static final double RESIZE_FACTOR2 = 1.1; //factor after reaching est nnz
+
+ public enum Type {
+ MCSR,
+ CSR,
+ COO,
+ }
+
+
+ ////////////////////////
+ //basic allocation
+
+ /**
+ * Allocate the underlying data structure holding non-zero values
+ * of row r if necessary.
+ *
+ * @param r
+ */
+ public abstract void allocate(int r);
+
+ ////////////////////////
+ //obtain basic meta data
+
+ /**
+ * Get the number of rows in the sparse block.
+ *
+ * @return
+ */
+ public abstract int numRows();
+
+ /**
+ * Indicates if the underlying implementation allows thread-safe row
+ * updates if concurrent threads update disjoint rows.
+ *
+ * @return
+ */
+ public abstract boolean isThreadSafe();
+
+ /**
+ * Get the number of non-zero values in the sparse block.
+ *
+ * @return
+ */
+ public abstract long size();
+
+ /**
+ * Get the number of non-zero values in row r.
+ *
+ * @param r row index starting at 0
+ * @return
+ */
+ public abstract int size(int r);
+
+ /**
+ * Get information if row r is empty, i.e., does not contain non-zero
+ * values. Equivalent to size(r)==0. Users should do this check if
+ * it is unknown if the underlying row data structure is allocated.
+ *
+ * @param r row index starting at 0
+ * @return
+ */
+ public abstract boolean isEmpty(int r);
+
+
+ ////////////////////////
+ //obtain indexes/values/positions
+
+ /**
+ * Get the sorted array of column indexes of all non-zero entries in
+ * row r. Note that - for flexibility of the implementing format - the
+ * returned array may be larger, where the range for row r is given by
+ * [pos(r),pos(r)+size(r)).
+ *
+ * @param r row index starting at 0
+ * @return
+ */
+ public abstract int[] indexes(int r);
+
+ /**
+ * Get the array of all non-zero entries in row r, sorted by their column
+ * indexes. Note that - for flexibility of the implementing format - the
+ * returned array may be larger, where the range for row r is given by
+ * [pos(r),pos(r)+size(r)).
+ *
+ * @param r row index starting at 0
+ * @return
+ */
+ public abstract double[] values(int r);
+
+ /**
+ * Get the starting position of row r in the indexes/values arrays returned
+ * by indexes(r) and values(r).
+ *
+ * @param r row index starting at 0
+ * @return
+ */
+ public abstract int pos(int r);
+
+
+ ////////////////////////
+ //update operations
+
+ /**
+ * Set the value of a matrix cell (r,c). This might update an existing
+ * non-zero value, insert a new non-zero value, or delete a non-zero value.
+ *
+ * @param r row index starting at 0
+ * @param c column index starting at 0
+ * @param v zero or non-zero value
+ * @return
+ */
+ public abstract boolean set(int r, int c, double v);
+
+ /**
+ * Append a value to the end of the physical representation. This should
+ * only be used for operations with sequential write pattern or if followed
+ * by a sort() operation. Note that this operation does not perform any
+ * matrix cell updates.
+ *
+ * @param r row index starting at 0
+ * @param c column index starting at 0
+ * @param v zero or non-zero value
+ */
+ public abstract void append(int r, int c, double v);
+
+ /**
+ * Sets a sorted array of non-zeros values into the column range [cl,cu)
+ * in row r. The passed value array may be larger and the relevant range
+ * is given by [vix,vix+len).
+ *
+ * @param r row index starting at 0
+ * @param cl lower column index starting at 0
+ * @param cu upper column index starting at 0
+ * @param v value array
+ * @param vix start index in value array
+ * @param vlen number of relevant values
+ */
+ public abstract void setIndexRange(int r, int cl, int cu, double[] v, int vix, int vlen);
+
+ /**
+ * Deletes all non-zero values of the given column range [cl,cu) in row r.
+ *
+ * @param r row index starting at 0
+ * @param cl lower column index starting at 0
+ * @param cu upper column index starting at 0
+ */
+ public abstract void deleteIndexRange(int r, int cl, int cu);
+
+ /**
+ * Sort all non-zero value/index pairs of the sparse block by row
+ * and column index.
+ */
+ public abstract void sort();
+
+ /**
+ * Sort all non-zero value/index pairs of row r column index.
+ *
+ * @param r row index starting at 0
+ */
+ public abstract void sort(int r);
+
+
+ ////////////////////////
+ //search operations
+
+ /**
+ * Get value of matrix cell (r,c). In case of non existing values
+ * this call returns 0.
+ *
+ * @param r row index starting at 0
+ * @param c column index starting at 0
+ * @return
+ */
+ public abstract double get(int r, int c);
+
+ /**
+ * Get position of first column index lower than or equal column c
+ * in row r. The position is relative to the indexes/values arrays
+ * returned by indexes(r) and values(r). If no such value exists,
+ * this call returns -1.
+ *
+ * @param r row index starting at 0
+ * @param c column index starting at 0
+ * @return
+ */
+ public abstract int posFIndexLTE(int r, int c);
+
+ /**
+ * Get position of first column index greater than or equal column c
+ * in row r. The position is relative to the indexes/values arrays
+ * returned by indexes(r) and values(r). If no such value exists,
+ * this call returns -1.
+ *
+ * @param r
+ * @param c
+ * @return
+ */
+ public abstract int posFIndexGTE(int r, int c);
+
+ /**
+ * Get position of first column index greater than column c in row r.
+ * The position is relative to the indexes/values arrays returned by
+ * indexes(r) and values(r). If no such value exists, this call
+ * returns -1.
+ *
+ * @param r
+ * @param c
+ * @return
+ */
+ public abstract int posFIndexGT(int r, int c);
+
+
+ ////////////////////////
+ //iterators
+
+ /**
+ * Get a non-zero iterator over the entire sparse block. Note that
+ * the returned IJV object is reused across next calls and should
+ * be directly consumed or deep copied.
+ *
+ * @return
+ */
+ public Iterator<IJV> getIterator() {
+ //default generic iterator, override if necessary
+ return new SparseBlockIterator(numRows());
+ }
+
+ /**
+ * Get a non-zero iterator over the subblock [rl, ru). Note that
+ * the returned IJV object is reused across next calls and should
+ * be directly consumed or deep copied.
+ *
+ * @param rl inclusive lower row index starting at 0
+ * @param ru exclusive upper row index starting at 0
+ * @return
+ */
+ public Iterator<IJV> getIterator(int rl, int ru) {
+ //default generic iterator, override if necessary
+ return new SparseBlockIterator(rl, Math.min(ru,numRows()));
+ }
+
+
+ /**
+ * Default sparse block iterator implemented against the sparse block
+ * api in an implementation-agnostic manner.
+ *
+ */
+ private class SparseBlockIterator implements Iterator<IJV>
+ {
+ private int _rlen = 0; //row upper
+ private int _curRow = -1; //current row
+ private int _curColIx = -1; //current col index pos
+ private int[] _curIndexes = null; //current col indexes
+ private double[] _curValues = null; //current col values
+ private boolean _noNext = false; //end indicator
+ private IJV retijv = new IJV(); //reuse output tuple
+
+ protected SparseBlockIterator(int ru) {
+ _rlen = ru;
+ _curRow = 0;
+ findNextNonZeroRow();
+ }
+
+ protected SparseBlockIterator(int rl, int ru) {
+ _rlen = ru;
+ _curRow = rl;
+ findNextNonZeroRow();
+ }
+
+ @Override
+ public boolean hasNext() {
+ return !_noNext;
+ }
+
+ @Override
+ public IJV next( ) {
+ retijv.set(_curRow, _curIndexes[_curColIx], _curValues[_curColIx]);
+ if( ++_curColIx >= pos(_curRow)+size(_curRow) ) {
+ _curRow++;
+ findNextNonZeroRow();
+ }
+
+ return retijv;
+ }
+
+ @Override
+ public void remove() {
+ throw new RuntimeException("SparseBlockIterator is unsupported!");
+ }
+
+ /**
+ * Moves cursor to next non-zero value or indicates that no more
+ * values are available.
+ */
+ private void findNextNonZeroRow() {
+ while( _curRow<_rlen && isEmpty(_curRow))
+ _curRow++;
+ if(_curRow >= _rlen)
+ _noNext = true;
+ else {
+ _curColIx = pos(_curRow);
+ _curIndexes = indexes(_curRow);
+ _curValues = values(_curRow);
+ }
+ }
+ }
+}