You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/01/18 08:15:03 UTC

[5/7] incubator-systemml git commit: [SYSTEMML-380] New CSR sparse matrix block

[SYSTEMML-380] New CSR sparse matrix block

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/6aa34379
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/6aa34379
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/6aa34379

Branch: refs/heads/master
Commit: 6aa34379fb1ed94db740c1997d3b6afe8e335451
Parents: 34e8221
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sun Jan 17 23:05:08 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sun Jan 17 23:12:34 2016 -0800

----------------------------------------------------------------------
 .../runtime/matrix/data/SparseBlockCSR.java     | 439 +++++++++++++++++++
 1 file changed, 439 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6aa34379/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
new file mode 100644
index 0000000..1f7a890
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
@@ -0,0 +1,439 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+package org.apache.sysml.runtime.matrix.data;
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.util.SortUtils;
+
+/**
+ * SparseBlock implementation that realizes a traditional 'compressed sparse row'
+ * representation, where the entire sparse block is stored as three arrays: ptr
+ * of length rlen+1 to store offsets per row, and indexes/values of length nnz
+ * to store column indexes and values of non-zero entries. This format is very
+ * memory efficient for sparse (but not ultra-sparse) matrices and provides very 
+ * good performance for common operations, partially due to lower memory bandwidth 
+ * requirements. However, this format is slow on incremental construction (because 
+ * it does not allow append/sort per row) without reshifting. Finally, the total 
+ * nnz is limited to INTEGER_MAX, whereas for SparseBlockMCSR only the nnz per 
+ * row are limited to INTEGER_MAX.  
+ * 
+ * TODO: extensions for faster incremental construction (e.g., max row)
+ * TODO more efficient fused setIndexRange impl to avoid repeated copies and updates
+ * 	
+ */
+public class SparseBlockCSR extends SparseBlock 
+{
+	private static final long serialVersionUID = 1922673868466164244L;
+
+	private int[] _ptr = null;       //row pointer array (size: rlen+1)
+	private int[] _indexes = null;   //column index array (size: >=nnz)
+	private double[] _values = null; //value array (size: >=nnz)
+	private int _size = 0;           //actual number of nnz
+	
+	public SparseBlockCSR(int rlen) {
+		this(rlen, INIT_CAPACITY);
+	}
+	
+	public SparseBlockCSR(int rlen, int capacity) {
+		_ptr = new int[rlen+1]; //ix0=0
+		_indexes = new int[capacity];
+		_values = new double[capacity];
+		_size = 0;
+	}
+	
+	/**
+	 * Copy constructor old sparse row representation. 
+	 */
+	public SparseBlockCSR(SparseRow[] rows, int nnz)
+	{
+		int rlen = rows.length;
+		
+		_ptr = new int[rlen+1]; //ix0=0
+		_indexes = new int[nnz];
+		_values = new double[nnz];
+		_size = nnz;
+		
+		for( int i=0, pos=0; i<rlen; i++ ) {
+			int alen = rows[i].size();
+			int[] aix = rows[i].getIndexContainer();
+			double[] avals = rows[i].getValueContainer();
+			for( int j=0; j<alen; j++ ) {
+				_indexes[pos] = aix[j];
+				_values[pos] = avals[j];
+				pos++;
+			}
+			_ptr[i+1]=pos;	
+		}
+	}
+	
+	@Override
+	public void allocate(int r) {
+		//do nothing everything preallocated
+	}
+
+	@Override
+	public int numRows() {
+		return _ptr.length-1;
+	}
+
+	@Override
+	public boolean isThreadSafe() {
+		return false;
+	}
+	
+	@Override
+	public long size() {
+		return _size;
+	}
+
+	@Override
+	public int size(int r) {
+		return _ptr[r+1] - _ptr[r];
+	}
+
+	@Override
+	public boolean isEmpty(int r) {
+		return (_ptr[r+1] - _ptr[r] == 0);
+	}
+	
+	@Override
+	public int[] indexes(int r) {
+		return _indexes;
+	}
+
+	@Override
+	public double[] values(int r) {
+		return _values;
+	}
+
+	@Override
+	public int pos(int r) {
+		return _ptr[r];
+	}
+
+	@Override
+	public boolean set(int r, int c, double v) {
+		int pos = pos(r);
+		int len = size(r);
+		
+		//search for existing col index
+		int index = Arrays.binarySearch(_indexes, pos, pos+len, c);
+		if( index >= 0 ) {
+			//delete/overwrite existing value (on value delete, we shift 
+			//left for (1) correct nnz maintenance, and (2) smaller size)
+			if( v == 0 ) {
+				shiftLeftAndDelete(index);
+				decrPtr(r+1);
+				return true; // nnz--
+			}
+			else { 	
+				_values[index] = v;
+				return false;
+			} 
+		}
+
+		//early abort on zero (if no overwrite)
+		if( v==0 ) return false;
+		
+		//insert new index-value pair
+		index = Math.abs( index+1 );
+		if( _size==_values.length )
+			resizeAndInsert(index, c, v);
+		else
+			shiftRightAndInsert(index, c, v);
+		incrPtr(r+1);
+		return true; // nnz++
+	}
+
+	@Override
+	public void append(int r, int c, double v) {
+		//early abort on zero 
+		if( v==0 ) return;
+	
+		int pos = pos(r);
+		int len = size(r);
+		if( pos+len == _size ) {
+			//resize and append
+			if( _size==_values.length )
+				resize();
+			insert(_size, c, v);		
+		}		
+		else {
+			//resize, shift and insert
+			if( _size==_values.length )
+				resizeAndInsert(pos+len, c, v);
+			else
+				shiftRightAndInsert(pos+len, c, v);
+		}			
+		incrPtr(r+1);
+	}
+
+	@Override
+	public void setIndexRange(int r, int cl, int cu, double[] v, int vix, int vlen) {
+		//delete existing values in range if necessary 
+		deleteIndexRange(r, cl, cu);
+		
+		//determine input nnz
+		int lnnz = 0;
+		for( int i=vix; i<vix+vlen; i++ )
+			lnnz += ( v[i] != 0 ) ? 1 : 0;
+
+		//prepare free space (allocate and shift)
+		int lsize = _size+lnnz;
+		if( _values.length < lsize )
+			resize(lsize);
+		int index = posFIndexGT(r, cl);
+		shiftRightByN((index>0)?index:pos(r+1), lnnz);
+		
+		//insert values
+		for( int i=vix; i<vix+vlen; i++ )
+			if( v[i] != 0 ) {
+				_indexes[ index ] = cl+i-vix;
+				_values[ index ] = v[i];
+				index++;
+			}
+		incrPtr(r+1, lnnz);
+	}
+
+	@Override
+	public void deleteIndexRange(int r, int cl, int cu) {
+		int start = posFIndexGTE(r,cl);
+		if( start < 0 ) //nothing to delete 
+			return;		
+
+		int len = size(r);
+		int end = posFIndexGTE(r, cu);
+		if( end < 0 ) //delete all remaining
+			end = start+len;
+		
+		//overlapping array copy (shift rhs values left)
+		System.arraycopy(_indexes, end, _indexes, start, _size-end);
+		System.arraycopy(_values, end, _values, start, _size-end);
+		_size -= (end-start);		
+		
+		decrPtr(r+1, end-start);
+	}
+
+	@Override
+	public void sort() {
+		int rlen = numRows();
+		for( int i=0; i<rlen && pos(i)<_size; i++ )
+			sort(i);
+	}
+
+	@Override
+	public void sort(int r) {
+		int pos = pos(r);
+		int len = size(r);
+				
+		if( len<=100 || !SortUtils.isSorted(pos, pos+len, _indexes) )
+			SortUtils.sortByIndex(pos, pos+len, _indexes, _values);
+	}
+
+	@Override
+	public double get(int r, int c) {
+		int pos = pos(r);
+		int len = size(r);
+		
+		//search for existing col index in [pos,pos+len)
+		int index = Arrays.binarySearch(_indexes, pos, pos+len, c);		
+		return (index >= 0) ? _values[index] : 0;
+	}
+
+	@Override
+	public int posFIndexLTE(int r, int c) {
+		int pos = pos(r);
+		int len = size(r);
+		
+		//search for existing col index in [pos,pos+len)
+		int index = Arrays.binarySearch(_indexes, pos, pos+len, c);
+		if( index >= 0  )
+			return (index < pos+len) ? index : -1;
+		
+		//search lt col index (see binary search)
+		index = Math.abs( index+1 );
+		return (index-1 >= pos) ? index-1 : -1;
+	}
+
+	@Override
+	public int posFIndexGTE(int r, int c) {
+		int pos = pos(r);
+		int len = size(r);
+		
+		//search for existing col index
+		int index = Arrays.binarySearch(_indexes, pos, pos+len, c);
+		if( index >= 0  )
+			return (index < pos+len) ? index : -1;
+		
+		//search gt col index (see binary search)
+		index = Math.abs( index+1 );
+		return (index < pos+len) ? index : -1;
+	}
+
+	@Override
+	public int posFIndexGT(int r, int c) {
+		int pos = pos(r);
+		int len = size(r);
+		
+		//search for existing col index
+		int index = Arrays.binarySearch(_indexes, pos, pos+len, c);
+		if( index >= 0  )
+			return (index+1 < pos+len) ? index+1 : -1;
+		
+		//search gt col index (see binary search)
+		index = Math.abs( index+1 );
+		return (index < pos+len) ? index : -1;
+	}
+	
+	///////////////////////////
+	// private helper methods
+	
+	/**
+	 * 
+	 */
+	private void resize() {
+		//compute new size
+		double tmpCap = _values.length * RESIZE_FACTOR1;
+		int newCap = (int)Math.min(tmpCap, Integer.MAX_VALUE);
+		
+		resize(newCap);
+	}
+	
+	private void resize(int capacity) {
+		//reallocate arrays and copy old values
+		_indexes = Arrays.copyOf(_indexes, capacity);
+		_values = Arrays.copyOf(_values, capacity);
+	}
+	
+	/**
+	 * 
+	 * @param ix
+	 * @param c
+	 * @param v
+	 */
+	private void resizeAndInsert(int ix, int c, double v) {
+		//compute new size
+		double tmpCap = _values.length * RESIZE_FACTOR1;
+		int newCap = (int)Math.min(tmpCap, Integer.MAX_VALUE);
+		
+		int[] oldindexes = _indexes;
+		double[] oldvalues = _values;
+		_indexes = new int[newCap];
+		_values = new double[newCap];
+		
+		//copy lhs values to new array
+		System.arraycopy(oldindexes, 0, _indexes, 0, ix);
+		System.arraycopy(oldvalues, 0, _values, 0, ix);
+		
+		//copy rhs values to new array
+		System.arraycopy(oldindexes, ix, _indexes, ix+1, _size-ix);
+		System.arraycopy(oldvalues, ix, _values, ix+1, _size-ix);
+		
+		//insert new value
+		insert(ix, c, v);
+	}
+	
+	/**
+	 * 
+	 * @param ix
+	 * @param c
+	 * @param v
+	 */
+	private void shiftRightAndInsert(int ix, int c, double v)  {		
+		//overlapping array copy (shift rhs values right by 1)
+		System.arraycopy(_indexes, ix, _indexes, ix+1, _size-ix);
+		System.arraycopy(_values, ix, _values, ix+1, _size-ix);
+		
+		//insert new value
+		insert(ix, c, v);
+	}
+	
+	/**
+	 * 
+	 * @param index
+	 */
+	private void shiftLeftAndDelete(int ix)
+	{
+		//overlapping array copy (shift rhs values left by 1)
+		System.arraycopy(_indexes, ix+1, _indexes, ix, _size-ix-1);
+		System.arraycopy(_values, ix+1, _values, ix, _size-ix-1);
+		_size--;
+	}
+
+	private void shiftRightByN(int ix, int n) 
+	{		
+		//overlapping array copy (shift rhs values right by 1)
+		System.arraycopy(_indexes, ix, _indexes, ix+n, _size-ix);
+		System.arraycopy(_values, ix, _values, ix+n, _size-ix);
+		_size += n;
+	}
+	
+	/**
+	 * 
+	 * @param ix
+	 * @param c
+	 * @param v
+	 */
+	private void insert(int ix, int c, double v) {
+		_indexes[ix] = c;
+		_values[ix] = v;
+		_size++;	
+	}
+	
+	/**
+	 * 
+	 * @param rl
+	 */
+	private void incrPtr(int rl) {
+		incrPtr(rl, 1);
+	}
+	
+	/**
+	 * 
+	 * @param rl
+	 * @param cnt
+	 */
+	private void incrPtr(int rl, int cnt) {
+		int rlen = numRows();
+		for( int i=rl; i<rlen+1; i++ )
+			_ptr[i]+=cnt;
+	}
+	
+	/**
+	 * 
+	 * @param rl
+	 */
+	private void decrPtr(int rl) {
+		decrPtr(rl, 1);
+	}
+	
+	/**
+	 * 
+	 * @param rl
+	 * @param cnt
+	 */
+	private void decrPtr(int rl, int cnt) {
+		int rlen = numRows();
+		for( int i=rl; i<rlen+1; i++ )
+			_ptr[i]-=cnt;
+	}
+}