You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by lr...@apache.org on 2015/11/19 21:46:52 UTC

[10/50] [abbrv] incubator-systemml git commit: [SYSML-221] New cbind/rbind (compiler/runtime cp/mr/sp), incl tests

[SYSML-221] New cbind/rbind (compiler/runtime cp/mr/sp), incl tests

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/1043f433
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/1043f433
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/1043f433

Branch: refs/heads/master
Commit: 1043f43380399a5aa7f8cf6bc378ee3e671bb2ba
Parents: 858c438
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Oct 29 01:40:57 2015 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Thu Oct 29 12:28:22 2015 -0700

----------------------------------------------------------------------
 src/main/java/com/ibm/bi/dml/hops/BinaryOp.java | 146 +++++++----
 src/main/java/com/ibm/bi/dml/hops/Hop.java      |   5 +-
 .../ibm/bi/dml/hops/ParameterizedBuiltinOp.java |   4 +-
 .../hops/rewrite/RewriteConstantFolding.java    |   3 +-
 src/main/java/com/ibm/bi/dml/lops/AppendCP.java |  23 +-
 src/main/java/com/ibm/bi/dml/lops/AppendG.java  |  39 ++-
 .../com/ibm/bi/dml/lops/AppendGAlignedSP.java   |  25 +-
 src/main/java/com/ibm/bi/dml/lops/AppendM.java  |  48 ++--
 src/main/java/com/ibm/bi/dml/lops/AppendR.java  |  32 +--
 .../dml/parser/BuiltinFunctionExpression.java   |  60 +++--
 .../com/ibm/bi/dml/parser/DMLTranslator.java    |   9 +-
 .../java/com/ibm/bi/dml/parser/Expression.java  |   5 +-
 .../instructions/cp/AppendCPInstruction.java    |  74 +++---
 .../instructions/mr/AppendGInstruction.java     | 135 ++++++----
 .../instructions/mr/AppendInstruction.java      |  10 +-
 .../instructions/mr/AppendMInstruction.java     |  59 +++--
 .../instructions/mr/AppendRInstruction.java     |  17 +-
 .../spark/AppendGAlignedSPInstruction.java      |  55 ++--
 .../spark/AppendGSPInstruction.java             | 262 +++++++++----------
 .../spark/AppendMSPInstruction.java             | 127 +++++----
 .../spark/AppendRSPInstruction.java             |  47 ++--
 .../instructions/spark/BinarySPInstruction.java |  43 ++-
 .../runtime/matrix/MatrixCharacteristics.java   |  13 +-
 .../dml/runtime/matrix/data/CM_N_COVCell.java   |   2 +-
 .../bi/dml/runtime/matrix/data/MatrixBlock.java |  60 ++++-
 .../bi/dml/runtime/matrix/data/MatrixCell.java  |   2 +-
 .../bi/dml/runtime/matrix/data/MatrixValue.java |   3 +-
 .../matrix/data/OperationsOnMatrixValues.java   |   4 +-
 .../functions/append/RBindCBindMatrixTest.java  | 192 ++++++++++++++
 .../functions/append/RBindCBindMatrixTest.R     |  28 ++
 .../functions/append/RBindCBindMatrixTest.dml   |  24 ++
 .../scripts/functions/append/RBindMatrixTest.R  |  28 ++
 .../functions/append/RBindMatrixTest.dml        |  24 ++
 33 files changed, 1055 insertions(+), 553 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/hops/BinaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/hops/BinaryOp.java b/src/main/java/com/ibm/bi/dml/hops/BinaryOp.java
index 945f4f0..c2c111b 100644
--- a/src/main/java/com/ibm/bi/dml/hops/BinaryOp.java
+++ b/src/main/java/com/ibm/bi/dml/hops/BinaryOp.java
@@ -153,7 +153,8 @@ public class BinaryOp extends Hop
 				constructLopsMedian(et);
 				break;
 			}
-			case APPEND: {
+			case CBIND: 
+			case RBIND: {
 				constructLopsAppend(et);
 				break;
 			}
@@ -494,6 +495,7 @@ public class BinaryOp extends Hop
 		DataType dt2 = getInput().get(1).getDataType();
 		ValueType vt1 = getInput().get(0).getValueType();
 		ValueType vt2 = getInput().get(1).getValueType();
+		boolean cbind = op==OpOp2.CBIND;
 		
 		//sanity check for input data types
 		if( !((dt1==DataType.MATRIX && dt2==DataType.MATRIX)
@@ -506,29 +508,31 @@ public class BinaryOp extends Hop
 		Lop append = null;
 		if( dt1==DataType.MATRIX && dt2==DataType.MATRIX )
 		{
+			long rlen = cbind ? getInput().get(0).getDim1() : (getInput().get(0).dimsKnown() && getInput().get(1).dimsKnown()) ?
+				getInput().get(0).getDim1()+getInput().get(1).getDim1() : -1;
+			long clen = cbind ? ((getInput().get(0).dimsKnown() && getInput().get(1).dimsKnown()) ?
+				getInput().get(0).getDim2()+getInput().get(1).getDim2() : -1) : getInput().get(0).getDim2();			
+		
 			if( et == ExecType.MR )
 			{
-				append = constructMRAppendLop(getInput().get(0), getInput().get(1), getDataType(), getValueType(), this);				
+				append = constructMRAppendLop(getInput().get(0), getInput().get(1), getDataType(), getValueType(), cbind, this);				
 			}
 			else if(et == ExecType.SPARK) 
 			{
-				long ncol = (getInput().get(0).dimsKnown() && getInput().get(1).dimsKnown()) ? 
-						   getInput().get(0).getDim2()+getInput().get(1).getDim2() : -1; 
-				append = constructSPAppendLop(getInput().get(0), getInput().get(1), getDataType(), getValueType(), this);
-				append.getOutputParameters().setDimensions(getInput().get(0).getDim1(), ncol, getRowsInBlock(), getColsInBlock(), getNnz());
+				append = constructSPAppendLop(getInput().get(0), getInput().get(1), getDataType(), getValueType(), cbind, this);
+				append.getOutputParameters().setDimensions(rlen, clen, getRowsInBlock(), getColsInBlock(), getNnz());
 			}
 			else //CP
 			{
-				Lop offset = createOffsetLop( getInput().get(0), true ); //offset 1st input
-				append = new AppendCP(getInput().get(0).constructLops(), getInput().get(1).constructLops(), offset, getDataType(), getValueType());
-				append.getOutputParameters().setDimensions(getInput().get(0).getDim1(), getInput().get(0).getDim2()+getInput().get(1).getDim2(), 
-							                                getRowsInBlock(), getColsInBlock(), getNnz());
+				Lop offset = createOffsetLop( getInput().get(0), cbind ); //offset 1st input
+				append = new AppendCP(getInput().get(0).constructLops(), getInput().get(1).constructLops(), offset, getDataType(), getValueType(), cbind);
+				append.getOutputParameters().setDimensions(rlen, clen, getRowsInBlock(), getColsInBlock(), getNnz());
 			}
 		}
 		else //SCALAR-STRING and SCALAR-STRING (always CP)
 		{
 			append = new AppendCP(getInput().get(0).constructLops(), getInput().get(1).constructLops(), 
-				     Data.createLiteralLop(ValueType.INT, "-1"), getDataType(), getValueType());
+				     Data.createLiteralLop(ValueType.INT, "-1"), getDataType(), getValueType(), cbind);
 			append.getOutputParameters().setDimensions(0,0,-1,-1,-1);
 		}
 		
@@ -603,7 +607,6 @@ public class BinaryOp extends Hop
 			{
 				Hop left = getInput().get(0);
 				Hop right = getInput().get(1);
-				//TODO need to create spark-specific op selection for supporting binarym/binaryr
 				MMBinaryMethod mbin = optFindMMBinaryMethodSpark(left, right);
 				
 				Lop  binary = null;
@@ -760,7 +763,7 @@ public class BinaryOp extends Hop
 		if( dimsKnown() && _nnz<0 ) //never after inference
 			nnz = -1; 
 		
-		if(op==OpOp2.APPEND && !OptimizerUtils.ALLOW_DYN_RECOMPILATION && !(getDataType()==DataType.SCALAR) ) {	
+		if((op==OpOp2.CBIND || op==OpOp2.RBIND) && !OptimizerUtils.ALLOW_DYN_RECOMPILATION && !(getDataType()==DataType.SCALAR) ) {	
 			ret = OptimizerUtils.DEFAULT_SIZE;
 		}
 		else
@@ -826,8 +829,7 @@ public class BinaryOp extends Hop
 		DataType dt1 = input1.getDataType();
 		DataType dt2 = input2.getDataType();
 		
-		if( op== OpOp2.APPEND )
-		{
+		if( op== OpOp2.CBIND ) {
 			long ldim1 = -1, ldim2 = -1, lnnz = -1;
 			
 			if( mc[0].rowsKnown() || mc[1].rowsKnown() )
@@ -840,6 +842,19 @@ public class BinaryOp extends Hop
 			if( ldim1 > 0 || ldim2 > 0 || lnnz >= 0 )
 				return new long[]{ldim1, ldim2, lnnz};
 		}
+		else if( op== OpOp2.CBIND ) {
+			long ldim1 = -1, ldim2 = -1, lnnz = -1;
+			
+			if( mc[0].colsKnown() || mc[1].colsKnown() )
+				ldim2 = mc[0].colsKnown() ? mc[0].getCols() : mc[1].getCols();
+			if( mc[0].rowsKnown() && mc[1].rowsKnown() )
+				ldim1 = mc[0].getRows()+mc[1].getRows();
+			if( mc[0].nnzKnown() && mc[1].nnzKnown() )
+				lnnz = mc[0].getNonZeros() + mc[1].getNonZeros();
+			
+			if( ldim1 > 0 || ldim2 > 0 || lnnz >= 0 )
+				return new long[]{ldim1, ldim2, lnnz};
+		}
 		else if ( op == OpOp2.SOLVE ) {
 			// Output is a (likely to be dense) vector of size number of columns in the first input
 			if ( mc[0].getCols() > 0 ) {
@@ -979,7 +994,7 @@ public class BinaryOp extends Hop
 		
 		//mark for recompile (forever)
 		if( OptimizerUtils.ALLOW_DYN_RECOMPILATION && ((!dimsKnown(true)&&_etype==REMOTE) 
-			|| (op == OpOp2.APPEND && getDataType()!=DataType.SCALAR) ) )
+			|| ((op == OpOp2.CBIND || op == OpOp2.RBIND) && getDataType()!=DataType.SCALAR) ) )
 		{
 			setRequiresRecompile();
 		}
@@ -997,11 +1012,12 @@ public class BinaryOp extends Hop
 	 * 
 	 * @param left
 	 * @param right
+	 * @param cbind 
 	 * @return
 	 * @throws HopsException 
 	 * @throws LopsException 
 	 */
-	public static Lop constructMRAppendLop( Hop left, Hop right, DataType dt, ValueType vt, Hop current ) 
+	public static Lop constructMRAppendLop( Hop left, Hop right, DataType dt, ValueType vt, boolean cbind, Hop current ) 
 		throws HopsException, LopsException
 	{
 		Lop ret = null;
@@ -1010,13 +1026,14 @@ public class BinaryOp extends Hop
 		long m1_dim2 = left.getDim2();		
 		long m2_dim1 = right.getDim1();
 		long m2_dim2 = right.getDim2();
-		long m3_dim2 = (m1_dim2>0 && m2_dim2>0) ? (m1_dim2 + m2_dim2) : -1; //output cols
+		long m3_dim1 = cbind ? m1_dim1 : ((m1_dim1>0 && m2_dim1>0) ? (m1_dim1 + m2_dim1) : -1); //output rows
+		long m3_dim2 = cbind ? ((m1_dim2>0 && m2_dim2>0) ? (m1_dim2 + m2_dim2) : -1): m1_dim2; //output cols
 		long m3_nnz = (left.getNnz()>0 && right.getNnz()>0) ? (left.getNnz() + right.getNnz()) : -1; //output nnz
 		long brlen = left.getRowsInBlock();
 		long bclen = left.getColsInBlock();
 		
-		Lop offset = createOffsetLop( left, true ); //offset 1st input
-		AppendMethod am = optFindAppendMethod(m1_dim1, m1_dim2, m2_dim1, m2_dim2, brlen, bclen);
+		Lop offset = createOffsetLop( left, cbind ); //offset 1st input
+		AppendMethod am = optFindAppendMethod(m1_dim1, m1_dim2, m2_dim1, m2_dim2, brlen, bclen, cbind);
 	
 		switch( am )
 		{
@@ -1034,9 +1051,9 @@ public class BinaryOp extends Hop
 					dcInput.setAllPositions(right.getBeginLine(), right.getBeginColumn(), right.getEndLine(), right.getEndColumn());
 				}					
 				
-				AppendM appM = new AppendM(left.constructLops(), dcInput, offset, dt, vt, needPart, ExecType.MR);
+				AppendM appM = new AppendM(left.constructLops(), dcInput, offset, dt, vt, cbind, needPart, ExecType.MR);
 				appM.setAllPositions(current.getBeginLine(), current.getBeginColumn(), current.getEndLine(), current.getEndColumn());
-				appM.getOutputParameters().setDimensions(m1_dim1, m3_dim2, brlen, bclen, m3_nnz);
+				appM.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
 				ret = appM;
 				break;
 			}
@@ -1051,8 +1068,8 @@ public class BinaryOp extends Hop
 				group1.getOutputParameters().setDimensions(m2_dim1, m2_dim2, brlen, bclen, right.getNnz());
 				group1.setAllPositions(right.getBeginLine(), right.getBeginColumn(), right.getEndLine(), right.getEndColumn());
 				
-				AppendR appR = new AppendR(group1, group2, dt, vt, ExecType.MR);
-				appR.getOutputParameters().setDimensions(m1_dim1, m3_dim2, brlen, bclen, m3_nnz);
+				AppendR appR = new AppendR(group1, group2, dt, vt, cbind, ExecType.MR);
+				appR.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
 				appR.setAllPositions(current.getBeginLine(), current.getBeginColumn(), current.getEndLine(), current.getEndColumn());
 				
 				ret = appR;
@@ -1061,20 +1078,20 @@ public class BinaryOp extends Hop
 			case MR_GAPPEND:
 			{
 				//general case: map expand append, reduce aggregate
-				Lop offset2 = createOffsetLop( right, true ); //offset second input
+				Lop offset2 = createOffsetLop( right, cbind ); //offset second input
 				
-				AppendG appG = new AppendG(left.constructLops(), right.constructLops(),	offset, offset2, dt, vt, ExecType.MR);
-				appG.getOutputParameters().setDimensions(m1_dim1, m3_dim2, brlen, bclen, m3_nnz);
+				AppendG appG = new AppendG(left.constructLops(), right.constructLops(),	offset, offset2, dt, vt, cbind, ExecType.MR);
+				appG.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
 				appG.setAllPositions(current.getBeginLine(), current.getBeginColumn(), current.getEndLine(), current.getEndColumn());
 				
 				//group
 				Group group1 = new Group(appG, Group.OperationTypes.Sort, DataType.MATRIX, vt);
-				group1.getOutputParameters().setDimensions(m1_dim1, m3_dim2, brlen, bclen, m3_nnz);
+				group1.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
 				group1.setAllPositions(current.getBeginLine(), current.getBeginColumn(), current.getEndLine(), current.getEndColumn());
 				
 				//aggregate
 				Aggregate agg1 = new Aggregate(group1, Aggregate.OperationTypes.Sum, DataType.MATRIX, vt, ExecType.MR);
-				agg1.getOutputParameters().setDimensions(m1_dim1, m3_dim2, brlen, bclen, m3_nnz);
+				agg1.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
 				agg1.setAllPositions(current.getBeginLine(), current.getBeginColumn(), current.getEndLine(), current.getEndColumn());
 				ret = agg1;
 				break;
@@ -1097,36 +1114,40 @@ public class BinaryOp extends Hop
 	 * @throws HopsException
 	 * @throws LopsException
 	 */
-	public Lop constructSPAppendLop( Hop left, Hop right, DataType dt, ValueType vt, Hop current ) 
+	public static Lop constructSPAppendLop( Hop left, Hop right, DataType dt, ValueType vt, boolean cbind, Hop current ) 
 		throws HopsException, LopsException
 	{
 		Lop ret = null;
 		
-		Lop offset = createOffsetLop( left, true ); //offset 1st input
+		Lop offset = createOffsetLop( left, cbind ); //offset 1st input
 		AppendMethod am = optFindAppendSPMethod(left.getDim1(), left.getDim2(), right.getDim1(), right.getDim2(), 
-				right.getRowsInBlock(), right.getColsInBlock(), right.getNnz());
+				right.getRowsInBlock(), right.getColsInBlock(), right.getNnz(), cbind);
 	
 		switch( am )
 		{
 			case MR_MAPPEND: //special case map-only append
 			{
-				ret = new AppendM(left.constructLops(), right.constructLops(), offset, getDataType(), getValueType(), false, ExecType.SPARK);
+				ret = new AppendM(left.constructLops(), right.constructLops(), offset, 
+						current.getDataType(), current.getValueType(), cbind, false, ExecType.SPARK);
 				break;
 			}
 			case MR_RAPPEND: //special case reduce append w/ one column block
 			{
-				ret = new AppendR(left.constructLops(), right.constructLops(), getDataType(), getValueType(), ExecType.SPARK);
+				ret = new AppendR(left.constructLops(), right.constructLops(), 
+						current.getDataType(), current.getValueType(), cbind, ExecType.SPARK);
 				break;
 			}	
 			case MR_GAPPEND:
 			{
-				Lop offset2 = createOffsetLop( right, true ); //offset second input
-				ret = new AppendG(left.constructLops(), right.constructLops(), offset, offset2, getDataType(), getValueType(), ExecType.SPARK);
+				Lop offset2 = createOffsetLop( right, cbind ); //offset second input
+				ret = new AppendG(left.constructLops(), right.constructLops(), offset, offset2, 
+						current.getDataType(), current.getValueType(), cbind, ExecType.SPARK);
 				break;
 			}
 			case SP_GAlignedAppend:
 			{
-				ret = new AppendGAlignedSP(left.constructLops(), right.constructLops(), offset, getDataType(), getValueType());
+				ret = new AppendGAlignedSP(left.constructLops(), right.constructLops(), offset, 
+						current.getDataType(), current.getValueType(), cbind);
 				break;
 			}
 			default:
@@ -1151,7 +1172,7 @@ public class BinaryOp extends Hop
 	 * @throws HopsException
 	 * @throws LopsException
 	 */
-	public static Lop constructAppendLopChain( Hop left, Hop right1, Hop right2, DataType dt, ValueType vt, Hop current ) 
+	public static Lop constructAppendLopChain( Hop left, Hop right1, Hop right2, DataType dt, ValueType vt, boolean cbind, Hop current ) 
 		throws HopsException, LopsException
 	{
 		long m1_dim1 = left.getDim1();
@@ -1188,11 +1209,11 @@ public class BinaryOp extends Hop
 		group1.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, right2.getNnz());
 		group1.setAllPositions(right2.getBeginLine(), right2.getBeginColumn(), right2.getEndLine(), right2.getEndColumn());
 		
-		AppendR appR1 = new AppendR(group1, group2, dt, vt, ExecType.MR);
+		AppendR appR1 = new AppendR(group1, group2, dt, vt, cbind, ExecType.MR);
 		appR1.getOutputParameters().setDimensions(m1_dim1, m41_dim2, brlen, bclen, m41_nnz);
 		appR1.setAllPositions(current.getBeginLine(), current.getBeginColumn(), current.getEndLine(), current.getEndColumn());
 		
-		AppendR appR2 = new AppendR(appR1, group3, dt, vt, ExecType.MR);
+		AppendR appR2 = new AppendR(appR1, group3, dt, vt, cbind, ExecType.MR);
 		appR1.getOutputParameters().setDimensions(m1_dim1, m42_dim2, brlen, bclen, m42_nnz);
 		appR1.setAllPositions(current.getBeginLine(), current.getBeginColumn(), current.getEndLine(), current.getEndColumn());
 	
@@ -1227,15 +1248,16 @@ public class BinaryOp extends Hop
 	 * @param m2_dim2
 	 * @return
 	 */
-	private static AppendMethod optFindAppendMethod( long m1_dim1, long m1_dim2, long m2_dim1, long m2_dim2, long m1_rpb, long m1_cpb )
+	private static AppendMethod optFindAppendMethod( long m1_dim1, long m1_dim2, long m2_dim1, long m2_dim2, long m1_rpb, long m1_cpb, boolean cbind )
 	{
 		if(FORCED_APPEND_METHOD != null) {
 			return FORCED_APPEND_METHOD;
 		}
 		
 		//check for best case (map-only)		
-		if(    m2_dim1 >= 1 && m2_dim2 >= 1 // rhs dims known 				
-			&& m2_dim2 <= m1_cpb  ) //rhs is smaller than column block 
+		if(    m2_dim1 >= 1 && m2_dim2 >= 1   //rhs dims known 				
+			&& (cbind && m2_dim2 <= m1_cpb    //rhs is smaller than column block 
+			|| !cbind && m2_dim1 <= m1_rpb) ) //rhs is smaller than row block
 		{
 			double footprint = BinaryOp.footprintInMapper(m1_dim1, m1_dim2, m2_dim1, m2_dim2, m1_rpb, m1_cpb);
 			if ( footprint < APPEND_MEM_MULTIPLIER * OptimizerUtils.getRemoteMemBudgetMap(true) )
@@ -1243,8 +1265,10 @@ public class BinaryOp extends Hop
 		}
 		
 		//check for in-block append (reduce-only)
-		if( m1_dim2 >= 1 && m2_dim2 >= 0 //column dims known
-			&& m1_dim2+m2_dim2 <= m1_cpb ) //output has one column block
+		if( cbind && m1_dim2 >= 1 && m2_dim2 >= 0  //column dims known
+			&& m1_dim2+m2_dim2 <= m1_cpb   //output has one column block
+		  ||!cbind && m1_dim1 >= 1 && m2_dim1 >= 0 //row dims known
+			&& m1_dim1+m2_dim1 <= m1_rpb ) //output has one column block
 		{
 			return AppendMethod.MR_RAPPEND;
 		}
@@ -1253,30 +1277,35 @@ public class BinaryOp extends Hop
 		return AppendMethod.MR_GAPPEND; 	
 	}
 	
-	private static AppendMethod optFindAppendSPMethod( long m1_dim1, long m1_dim2, long m2_dim1, long m2_dim2, long m2_rpb, long m2_cpb, long m2_nnz )
+	private static AppendMethod optFindAppendSPMethod( long m1_dim1, long m1_dim2, long m2_dim1, long m2_dim2, long m1_rpb, long m1_cpb, long m2_nnz, boolean cbind )
 	{
 		if(FORCED_APPEND_METHOD != null) {
 			return FORCED_APPEND_METHOD;
 		}
 		
 		//check for best case (map-only w/o shuffle)		
-		if(    m2_dim1 >= 1 && m2_dim2 >= 1 // rhs dims known 				
-			&& m2_dim2 <= m2_cpb  ) //rhs is smaller than column block 
+		if(    m2_dim1 >= 1 && m2_dim2 >= 1   //rhs dims known 				
+			&& (cbind && m2_dim2 <= m1_cpb    //rhs is smaller than column block 
+			|| !cbind && m2_dim1 <= m1_rpb) ) //rhs is smaller than row block
 		{
-			if( OptimizerUtils.checkSparkBroadcastMemoryBudget(m2_dim1, m2_dim2, m2_rpb, m2_cpb, m2_nnz) ) {
+			if( OptimizerUtils.checkSparkBroadcastMemoryBudget(m2_dim1, m2_dim2, m1_rpb, m1_cpb, m2_nnz) ) {
 				return AppendMethod.MR_MAPPEND;
 			}
 		}
 		
 		//check for in-block append (reduce-only)
-		if( m1_dim2 >= 1 && m2_dim2 >= 0 //column dims known
-			&& m1_dim2+m2_dim2 <= m2_cpb ) //output has one column block
+		if( cbind && m1_dim2 >= 1 && m2_dim2 >= 0  //column dims known
+			&& m1_dim2+m2_dim2 <= m1_cpb   //output has one column block
+		  ||!cbind && m1_dim1 >= 1 && m2_dim1 >= 0 //row dims known
+			&& m1_dim1+m2_dim1 <= m1_rpb ) //output has one column block
 		{
 			return AppendMethod.MR_RAPPEND;
 		}
 		
 		// if(mc1.getCols() % mc1.getColsPerBlock() == 0) {
-		if(m1_dim2 % m2_cpb == 0) {
+		if( cbind && m1_dim2 % m1_cpb == 0 
+		   || !cbind && m1_dim1 % m1_rpb == 0 ) 
+		{
 			return AppendMethod.SP_GAlignedAppend;
 		}
 		
@@ -1400,7 +1429,7 @@ public class BinaryOp extends Hop
 		else //MATRIX OUTPUT
 		{
 			//TODO quantile
-			if( op == OpOp2.APPEND )
+			if( op == OpOp2.CBIND )
 			{
 				setDim1( (input1.getDim1()>0) ? input1.getDim1() : input2.getDim1() );
 					
@@ -1411,6 +1440,17 @@ public class BinaryOp extends Hop
 				if( input1.getNnz()>0 && input2.getNnz()>0 )
 					setNnz( input1.getNnz() + input2.getNnz() );
 			}
+			else if( op == OpOp2.RBIND )
+			{
+				setDim2( (input1.getDim2()>0) ? input1.getDim2() : input2.getDim2() );
+					
+				//ensure both rows are known, otherwise dangerous underestimation due to +(-1)
+				if( input1.getDim1()>0 && input2.getDim1()>0 )
+					setDim1( input1.getDim1() + input2.getDim1() );
+				//ensure both nnz are known, otherwise dangerous underestimation due to +(-1)
+				if( input1.getNnz()>0 && input2.getNnz()>0 )
+					setNnz( input1.getNnz() + input2.getNnz() );
+			}
 			else if ( op == OpOp2.SOLVE )
 			{
 				//normally the second input would be of equal size as the output 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/hops/Hop.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/hops/Hop.java b/src/main/java/com/ibm/bi/dml/hops/Hop.java
index 0a801d0..16ffda0 100644
--- a/src/main/java/com/ibm/bi/dml/hops/Hop.java
+++ b/src/main/java/com/ibm/bi/dml/hops/Hop.java
@@ -1036,7 +1036,7 @@ public abstract class Hop
 	public enum OpOp2 {
 		PLUS, MINUS, MULT, DIV, MODULUS, INTDIV, LESS, LESSEQUAL, GREATER, GREATEREQUAL, EQUAL, NOTEQUAL, 
 		MIN, MAX, AND, OR, LOG, POW, PRINT, CONCAT, QUANTILE, INTERQUANTILE, IQM, 
-		CENTRALMOMENT, COVARIANCE, APPEND, SEQINCR, SOLVE, MEDIAN, INVALID,
+		CENTRALMOMENT, COVARIANCE, CBIND, RBIND, SEQINCR, SOLVE, MEDIAN, INVALID,
 		//fused ML-specific operators for performance
 		MINUS_NZ, //sparse-safe minus: X-(mean*ppred(X,0,!=))
 		LOG_NZ, //sparse-safe log; ppred(X,0,"!=")*log(X,0.5)
@@ -1346,7 +1346,8 @@ public abstract class Hop
 		HopsOpOp2String.put(OpOp2.MEDIAN, "median");
 		HopsOpOp2String.put(OpOp2.CENTRALMOMENT, "cm");
 		HopsOpOp2String.put(OpOp2.COVARIANCE, "cov");
-		HopsOpOp2String.put(OpOp2.APPEND, "app");
+		HopsOpOp2String.put(OpOp2.CBIND, "cbind");
+		HopsOpOp2String.put(OpOp2.RBIND, "rbind");
 		HopsOpOp2String.put(OpOp2.SOLVE, "solve");
 		HopsOpOp2String.put(OpOp2.SEQINCR, "seqincr");
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/hops/ParameterizedBuiltinOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/hops/ParameterizedBuiltinOp.java b/src/main/java/com/ibm/bi/dml/hops/ParameterizedBuiltinOp.java
index 36e8e2b..12dc87e 100644
--- a/src/main/java/com/ibm/bi/dml/hops/ParameterizedBuiltinOp.java
+++ b/src/main/java/com/ibm/bi/dml/hops/ParameterizedBuiltinOp.java
@@ -223,7 +223,7 @@ public class ParameterizedBuiltinOp extends Hop
 						getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)), 
 						getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS)),
 						getInput().get(_paramIndexMap.get(Statement.GAGG_WEIGHTS)),
-						DataType.MATRIX, getValueType(), 
+						DataType.MATRIX, getValueType(), true,
 						getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)));
 
 				// add the combine lop to parameter list, with a new name "combinedinput"
@@ -238,7 +238,7 @@ public class ParameterizedBuiltinOp extends Hop
 				Lop append = BinaryOp.constructMRAppendLop(
 						getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)), 
 						getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS)), 
-						DataType.MATRIX, getValueType(), 
+						DataType.MATRIX, getValueType(), true,
 						getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)));
 				
 				// add the combine lop to parameter list, with a new name

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/hops/rewrite/RewriteConstantFolding.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/hops/rewrite/RewriteConstantFolding.java b/src/main/java/com/ibm/bi/dml/hops/rewrite/RewriteConstantFolding.java
index da80755..60db0fd 100644
--- a/src/main/java/com/ibm/bi/dml/hops/rewrite/RewriteConstantFolding.java
+++ b/src/main/java/com/ibm/bi/dml/hops/rewrite/RewriteConstantFolding.java
@@ -277,7 +277,8 @@ public class RewriteConstantFolding extends HopRewriteRule
 		return (   hop instanceof BinaryOp 
 				&& in.get(0) instanceof LiteralOp 
 				&& in.get(1) instanceof LiteralOp
-				&& ((BinaryOp)hop).getOp()!=OpOp2.APPEND );
+				&& ((BinaryOp)hop).getOp()!=OpOp2.CBIND
+				&& ((BinaryOp)hop).getOp()!=OpOp2.RBIND);
 		
 		//string append is rejected although possible because it
 		//messes up the explain runtime output due to introduced \n 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/lops/AppendCP.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/lops/AppendCP.java b/src/main/java/com/ibm/bi/dml/lops/AppendCP.java
index 6180813..f3108e5 100644
--- a/src/main/java/com/ibm/bi/dml/lops/AppendCP.java
+++ b/src/main/java/com/ibm/bi/dml/lops/AppendCP.java
@@ -25,24 +25,27 @@ import com.ibm.bi.dml.parser.Expression.*;
 
 public class AppendCP extends Lop
 {
-
 	public static final String OPCODE = "append";
 	
-	public AppendCP(Lop input1, Lop input2, Lop input3, DataType dt, ValueType vt) 
+	private boolean _cbind = true;
+	
+	public AppendCP(Lop input1, Lop input2, Lop input3, DataType dt, ValueType vt, boolean cbind) 
 	{
 		super(Lop.Type.Append, dt, vt);
 		init(input1, input2, input3, dt, vt);
+		
+		_cbind = cbind;
 	}
 	
 	public void init(Lop input1, Lop input2, Lop input3, DataType dt, ValueType vt) 
 	{
-		this.addInput(input1);
+		addInput(input1);
 		input1.addOutput(this);
 
-		this.addInput(input2);
+		addInput(input2);
 		input2.addOutput(this);
 		
-		this.addInput(input3);
+		addInput(input3);
 		input3.addOutput(this);
 		
 		boolean breaksAlignment = false;
@@ -50,7 +53,7 @@ public class AppendCP extends Lop
 		boolean definesMRJob = false;
 		
 		lps.addCompatibility(JobType.INVALID);
-		this.lps.setProperties( inputs, ExecType.CP, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
+		lps.setProperties( inputs, ExecType.CP, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
 	}
 	
 	@Override
@@ -64,7 +67,7 @@ public class AppendCP extends Lop
 		throws LopsException
 	{
 		StringBuilder sb = new StringBuilder();
-		sb.append( this.lps.execType );
+		sb.append( getExecType() );
 		sb.append( OPERAND_DELIMITOR );
 		sb.append( "append" );
 		
@@ -78,9 +81,11 @@ public class AppendCP extends Lop
 		sb.append( getInputs().get(2).prepScalarInputOperand(getExecType()));
 		
 		sb.append( OPERAND_DELIMITOR );
-		sb.append( this.prepOutputOperand(output_index+"") );
+		sb.append( prepOutputOperand(output_index+"") );
+		
+		sb.append( OPERAND_DELIMITOR );
+		sb.append( _cbind );
 		
 		return sb.toString();
 	}
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/lops/AppendG.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/lops/AppendG.java b/src/main/java/com/ibm/bi/dml/lops/AppendG.java
index d33619f..f254aa0 100644
--- a/src/main/java/com/ibm/bi/dml/lops/AppendG.java
+++ b/src/main/java/com/ibm/bi/dml/lops/AppendG.java
@@ -36,13 +36,16 @@ import com.ibm.bi.dml.parser.Expression.*;
  */
 public class AppendG extends Lop
 {
-	
 	public static final String OPCODE = "gappend";
 	
-	public AppendG(Lop input1, Lop input2, Lop input3, Lop input4, DataType dt, ValueType vt, ExecType et) 
+	private boolean _cbind = true;
+	
+	public AppendG(Lop input1, Lop input2, Lop input3, Lop input4, DataType dt, ValueType vt, boolean cbind, ExecType et) 
 	{
 		super(Lop.Type.Append, dt, vt);
 		init(input1, input2, input3, input4, dt, vt, et);
+		
+		_cbind = cbind;
 	}
 	
 	public void init(Lop input1, Lop input2, Lop input3, Lop input4, DataType dt, ValueType vt, ExecType et) 
@@ -84,27 +87,12 @@ public class AppendG extends Lop
 	public String getInstructions(int input_index1, int input_index2, int input_index3, int input_index4, int output_index) 
 		throws LopsException
 	{
-		StringBuilder sb = new StringBuilder();
-		sb.append( getExecType() );
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( OPCODE );
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( getInputs().get(0).prepInputOperand(input_index1+""));
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( getInputs().get(1).prepInputOperand(input_index2+""));
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( getInputs().get(2).prepScalarInputOperand(getExecType()));
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( getInputs().get(3).prepScalarInputOperand(getExecType()));
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( this.prepOutputOperand(output_index+"") );
-		
-		return sb.toString();	
+		return getInstructions(
+				String.valueOf(input_index1),
+				String.valueOf(input_index2),
+				String.valueOf(input_index3),
+				String.valueOf(input_index4),
+				String.valueOf(output_index) );
 	}
 	
 	//called when append executes in SP
@@ -130,7 +118,10 @@ public class AppendG extends Lop
 		sb.append( getInputs().get(3).prepScalarInputOperand(getExecType()));
 		
 		sb.append( OPERAND_DELIMITOR );
-		sb.append( this.prepOutputOperand(output_index+"") );
+		sb.append( prepOutputOperand(output_index+"") );
+		
+		sb.append( OPERAND_DELIMITOR );
+		sb.append( _cbind );
 		
 		return sb.toString();
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/lops/AppendGAlignedSP.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/lops/AppendGAlignedSP.java b/src/main/java/com/ibm/bi/dml/lops/AppendGAlignedSP.java
index 66d6994..b1b86c1 100644
--- a/src/main/java/com/ibm/bi/dml/lops/AppendGAlignedSP.java
+++ b/src/main/java/com/ibm/bi/dml/lops/AppendGAlignedSP.java
@@ -8,24 +8,27 @@ import com.ibm.bi.dml.parser.Expression.ValueType;
 
 public class AppendGAlignedSP extends Lop
 {
-
 	public static final String OPCODE = "galignedappend";
+
+	private boolean _cbind = true;
 	
-	public AppendGAlignedSP(Lop input1, Lop input2, Lop input3, DataType dt, ValueType vt) 
+	public AppendGAlignedSP(Lop input1, Lop input2, Lop input3, DataType dt, ValueType vt, boolean cbind) 
 	{
-		super(Lop.Type.Append, dt, vt);
+		super(Lop.Type.Append, dt, vt);		
 		init(input1, input2, input3, dt, vt);
+		
+		_cbind = true;
 	}
 	
 	public void init(Lop input1, Lop input2, Lop input3, DataType dt, ValueType vt) 
 	{
-		this.addInput(input1);
+		addInput(input1);
 		input1.addOutput(this);
 
-		this.addInput(input2);
+		addInput(input2);
 		input2.addOutput(this);
 		
-		this.addInput(input3);
+		addInput(input3);
 		input3.addOutput(this);
 		
 		boolean breaksAlignment = false;
@@ -33,12 +36,11 @@ public class AppendGAlignedSP extends Lop
 		boolean definesMRJob = false;
 		
 		lps.addCompatibility(JobType.INVALID);
-		this.lps.setProperties( inputs, ExecType.SPARK, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
+		lps.setProperties( inputs, ExecType.SPARK, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
 	}
 	
 	@Override
 	public String toString() {
-
 		return " AppendGSP: ";
 	}
 
@@ -47,7 +49,7 @@ public class AppendGAlignedSP extends Lop
 		throws LopsException
 	{
 		StringBuilder sb = new StringBuilder();
-		sb.append( this.lps.execType );
+		sb.append( getExecType() );
 		sb.append( OPERAND_DELIMITOR );
 		sb.append( OPCODE );
 		
@@ -61,7 +63,10 @@ public class AppendGAlignedSP extends Lop
 		sb.append( getInputs().get(2).prepScalarInputOperand(getExecType()));
 		
 		sb.append( OPERAND_DELIMITOR );
-		sb.append( this.prepOutputOperand(output_index+"") );
+		sb.append( prepOutputOperand(output_index+"") );
+		
+		sb.append( OPERAND_DELIMITOR );
+		sb.append( _cbind );
 		
 		return sb.toString();
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/lops/AppendM.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/lops/AppendM.java b/src/main/java/com/ibm/bi/dml/lops/AppendM.java
index 6a8acc6..ab364ca 100644
--- a/src/main/java/com/ibm/bi/dml/lops/AppendM.java
+++ b/src/main/java/com/ibm/bi/dml/lops/AppendM.java
@@ -25,23 +25,22 @@ import com.ibm.bi.dml.parser.Expression.*;
 
 public class AppendM extends Lop
 {
-	
 	public static final String OPCODE = "mappend";
 	
 	public enum CacheType {
 		RIGHT,
 		RIGHT_PART,
 	}
-
-	private CacheType _cacheType = null;
 	
+	private boolean _cbind = true;
+	private CacheType _cacheType = null;
 	
-	public AppendM(Lop input1, Lop input2, Lop input3, DataType dt, ValueType vt, boolean partitioned, ExecType et) 
+	public AppendM(Lop input1, Lop input2, Lop input3, DataType dt, ValueType vt, boolean cbind, boolean partitioned, ExecType et) 
 	{
 		super(Lop.Type.Append, dt, vt);
 		init(input1, input2, input3, dt, vt, et);
 		
-		//partitioned right input
+		_cbind = cbind;
 		_cacheType = partitioned ? CacheType.RIGHT_PART : CacheType.RIGHT;
 	}
 	
@@ -78,30 +77,14 @@ public class AppendM extends Lop
 	}
 
 	//called when append executes in MR
-	public String getInstructions(int input_index1, int input_index2, int input_index3, int output_index) throws LopsException
+	public String getInstructions(int input_index1, int input_index2, int input_index3, int output_index) 
+		throws LopsException
 	{
-		StringBuilder sb = new StringBuilder();
-		sb.append( getExecType() );
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( OPCODE );
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( getInputs().get(0).prepInputOperand(input_index1+""));
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( getInputs().get(1).prepInputOperand(input_index2+""));
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( getInputs().get(2).prepScalarInputOperand(getExecType()));
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( this.prepOutputOperand(output_index+"") );
-		
-		sb.append(Lop.OPERAND_DELIMITOR);
-		sb.append(_cacheType);
-		
-		return sb.toString();	
+		return getInstructions(
+				String.valueOf(input_index1),
+				String.valueOf(input_index2),
+				String.valueOf(input_index3),
+				String.valueOf(output_index) );
 	}
 
 
@@ -125,10 +108,17 @@ public class AppendM extends Lop
 		sb.append( getInputs().get(2).prepScalarInputOperand(getExecType()));
 		
 		sb.append( OPERAND_DELIMITOR );
-		sb.append( this.prepOutputOperand(output) );
+		sb.append( prepOutputOperand(output) );
 
 		//note: for SP: no cache type
+		if( getExecType()==ExecType.MR ){
+			sb.append(Lop.OPERAND_DELIMITOR);
+			sb.append(_cacheType);	
+		}
 		
+		sb.append( OPERAND_DELIMITOR );
+		sb.append( _cbind );
+				
 		return sb.toString();
 	}
 	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/lops/AppendR.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/lops/AppendR.java b/src/main/java/com/ibm/bi/dml/lops/AppendR.java
index 4cbc5f2..94b83bf 100644
--- a/src/main/java/com/ibm/bi/dml/lops/AppendR.java
+++ b/src/main/java/com/ibm/bi/dml/lops/AppendR.java
@@ -24,14 +24,17 @@ import com.ibm.bi.dml.parser.Expression.*;
 
 
 public class AppendR extends Lop
-{
-	
+{	
 	public static final String OPCODE = "rappend";
 	
-	public AppendR(Lop input1, Lop input2, DataType dt, ValueType vt, ExecType et) 
+	private boolean _cbind = true;
+	
+	public AppendR(Lop input1, Lop input2, DataType dt, ValueType vt, boolean cbind, ExecType et) 
 	{
 		super(Lop.Type.Append, dt, vt);
 		init(input1, input2, dt, vt, et);
+		
+		_cbind = cbind;
 	}
 	
 	public void init(Lop input1, Lop input2, DataType dt, ValueType vt, ExecType et) 
@@ -68,22 +71,10 @@ public class AppendR extends Lop
 	public String getInstructions(int input_index1, int input_index2, int output_index) 
 		throws LopsException
 	{
-		StringBuilder sb = new StringBuilder();
-		sb.append( getExecType() );
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( OPCODE );
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( getInputs().get(0).prepInputOperand(input_index1+""));
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( getInputs().get(1).prepInputOperand(input_index2+""));
-		
-		sb.append( OPERAND_DELIMITOR );
-		sb.append( prepOutputOperand(output_index+"") );
-		
-		return sb.toString();	
+		return getInstructions(
+				String.valueOf(input_index1),
+				String.valueOf(input_index2),
+				String.valueOf(output_index) );
 	}
 	
 	//called when append executes in CP
@@ -105,6 +96,9 @@ public class AppendR extends Lop
 		sb.append( OPERAND_DELIMITOR );
 		sb.append( prepOutputOperand(output_index+"") );
 		
+		sb.append( OPERAND_DELIMITOR );
+		sb.append( _cbind );
+		
 		return sb.toString();
 	}
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/parser/BuiltinFunctionExpression.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/parser/BuiltinFunctionExpression.java b/src/main/java/com/ibm/bi/dml/parser/BuiltinFunctionExpression.java
index fa61049..2f39797 100644
--- a/src/main/java/com/ibm/bi/dml/parser/BuiltinFunctionExpression.java
+++ b/src/main/java/com/ibm/bi/dml/parser/BuiltinFunctionExpression.java
@@ -386,21 +386,21 @@ public class BuiltinFunctionExpression extends DataIdentifier
 			output.setDimensions(0, 0);
 			output.setBlockDimensions (0, 0);
 			output.setValueType(ValueType.BOOLEAN);
-			break;	
-		case APPEND:
+			break;
+			
+		case CBIND:
+		case RBIND:	
 			checkNumParameters(2);
 			
 			//scalar string append (string concatenation with \n)
-			if( getFirstExpr().getOutput().getDataType()==DataType.SCALAR )
-			{
+			if( getFirstExpr().getOutput().getDataType()==DataType.SCALAR ) {
 				checkScalarParam(getFirstExpr());
 				checkScalarParam(getSecondExpr());
 				checkValueTypeParam(getFirstExpr(), ValueType.STRING);
 				checkValueTypeParam(getSecondExpr(), ValueType.STRING);
 			}
-			//matrix append (cbind)
-			else
-			{				
+			//matrix append (rbind/cbind)
+			else {				
 				checkMatrixParam(getFirstExpr());
 				checkMatrixParam(getSecondExpr());
 			}
@@ -408,29 +408,35 @@ public class BuiltinFunctionExpression extends DataIdentifier
 			output.setDataType(id.getDataType());
 			output.setValueType(id.getValueType());
 			
-			// set output dimensions
+			// set output dimensions and validate consistency
 			long appendDim1 = -1, appendDim2 = -1;
-			if (getFirstExpr().getOutput().getDim1() > 0 && getSecondExpr().getOutput().getDim1() > 0){
-				if (getFirstExpr().getOutput().getDim1() != getSecondExpr().getOutput().getDim1()){
-					raiseValidateError("inputs to append must have same number of rows: input 1 rows: " + 
-							getFirstExpr().getOutput().getDim1() +  ", input 2 rows " + getSecondExpr().getOutput().getDim1(), 
-							 conditional, LanguageErrorCodes.INVALID_PARAMETERS);
-				}
-				appendDim1 = getFirstExpr().getOutput().getDim1();
+			long m1rlen = getFirstExpr().getOutput().getDim1();
+			long m1clen = getFirstExpr().getOutput().getDim2();
+			long m2rlen = getSecondExpr().getOutput().getDim1();
+			long m2clen = getSecondExpr().getOutput().getDim2();
+			
+			if( getOpCode() == BuiltinFunctionOp.CBIND ) {
+				if (m1rlen > 0 && m2rlen > 0 && m1rlen!=m2rlen) {
+					raiseValidateError("inputs to cbind must have same number of rows: input 1 rows: " + 
+						m1rlen+", input 2 rows: "+m2rlen, conditional, LanguageErrorCodes.INVALID_PARAMETERS);
+				}				
+				appendDim1 = (m1rlen>0) ? m1rlen : m2rlen;
+				appendDim2 = (m1clen>0 && m2clen>0)? m1clen + m2clen : -1;
 			}
-			else if (getFirstExpr().getOutput().getDim1() > 0)	
-				appendDim1 = getFirstExpr().getOutput().getDim1(); 
-			else if (getSecondExpr().getOutput().getDim1() > 0 )
-				appendDim1 = getSecondExpr().getOutput().getDim1(); 
-				
-			if (getFirstExpr().getOutput().getDim2() > 0 && getSecondExpr().getOutput().getDim2() > 0){
-				appendDim2 = getFirstExpr().getOutput().getDim2() + getSecondExpr().getOutput().getDim2();
+			else if( getOpCode() == BuiltinFunctionOp.RBIND ) {
+				if (m1clen > 0 && m2clen > 0 && m1clen!=m2clen) {
+					raiseValidateError("inputs to rbind must have same number of columns: input 1 columns: " + 
+						m1clen+", input 2 columns: "+m2clen, conditional, LanguageErrorCodes.INVALID_PARAMETERS);
+				}				
+				appendDim1 = (m1rlen>0 && m2rlen>0)? m1rlen + m2rlen : -1;
+				appendDim2 = (m1clen>0) ? m1clen : m2clen;
 			}
 			
-			output.setDimensions(appendDim1, appendDim2); 
-			
+			output.setDimensions(appendDim1, appendDim2); 			
 			output.setBlockDimensions (id.getRowsInBlock(), id.getColumnsInBlock());
+			
 			break;
+			
 		case PPRED:
 			// ppred (X,Y, "<"); ppred (X,y, "<"); ppred (y,X, "<");
 			checkNumParameters(3);
@@ -1301,8 +1307,10 @@ public class BuiltinFunctionExpression extends DataIdentifier
 			bifop = Expression.BuiltinFunctionOp.TRACE;
 		else if (functionName.equals("t"))
 			 bifop = Expression.BuiltinFunctionOp.TRANS;
-		else if (functionName.equals("append"))
-			bifop = Expression.BuiltinFunctionOp.APPEND;
+		else if (functionName.equals("cbind") || functionName.equals("append"))
+			bifop = Expression.BuiltinFunctionOp.CBIND;
+		else if (functionName.equals("rbind"))
+			bifop = Expression.BuiltinFunctionOp.RBIND;
 		else if (functionName.equals("range"))
 			bifop = Expression.BuiltinFunctionOp.RANGE;
 		else if (functionName.equals("prod"))

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/parser/DMLTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/parser/DMLTranslator.java b/src/main/java/com/ibm/bi/dml/parser/DMLTranslator.java
index 4949a48..4163ca9 100644
--- a/src/main/java/com/ibm/bi/dml/parser/DMLTranslator.java
+++ b/src/main/java/com/ibm/bi/dml/parser/DMLTranslator.java
@@ -2341,9 +2341,14 @@ public class DMLTranslator
 					                    Hop.ReOrgOp.TRANSPOSE, expr);
 			break;
 			
-		case APPEND:
+		case CBIND:
 			currBuiltinOp = new BinaryOp(target.getName(), target.getDataType(), target.getValueType(), 
-										Hop.OpOp2.APPEND, expr, expr2);
+										Hop.OpOp2.CBIND, expr, expr2);
+			break;
+		
+		case RBIND:
+			currBuiltinOp = new BinaryOp(target.getName(), target.getDataType(), target.getValueType(), 
+										Hop.OpOp2.RBIND, expr, expr2);
 			break;
 		
 		case DIAG:

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/parser/Expression.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/parser/Expression.java b/src/main/java/com/ibm/bi/dml/parser/Expression.java
index 459a4ba..59019ee 100644
--- a/src/main/java/com/ibm/bi/dml/parser/Expression.java
+++ b/src/main/java/com/ibm/bi/dml/parser/Expression.java
@@ -45,8 +45,7 @@ public abstract class Expression
 		CONDITIONALAND, CONDITIONALOR, LOGICALAND, LOGICALOR, NOT, INVALID
 	};
 
-	public enum BuiltinFunctionOp {
-		APPEND, 
+	public enum BuiltinFunctionOp { 
 		ABS, 
 		ACOS,
 		ASIN, 
@@ -105,6 +104,8 @@ public abstract class Expression
 		SOLVE,
 		CEIL,
 		FLOOR,
+		CBIND, //previously APPEND
+		RBIND,
 		MEDIAN,
 		INVERSE,
 		SAMPLE

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/runtime/instructions/cp/AppendCPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/runtime/instructions/cp/AppendCPInstruction.java b/src/main/java/com/ibm/bi/dml/runtime/instructions/cp/AppendCPInstruction.java
index b3f9b4a..0d1ba99 100644
--- a/src/main/java/com/ibm/bi/dml/runtime/instructions/cp/AppendCPInstruction.java
+++ b/src/main/java/com/ibm/bi/dml/runtime/instructions/cp/AppendCPInstruction.java
@@ -18,7 +18,6 @@
 package com.ibm.bi.dml.runtime.instructions.cp;
 
 import com.ibm.bi.dml.parser.Expression.DataType;
-import com.ibm.bi.dml.parser.Expression.ValueType;
 import com.ibm.bi.dml.runtime.DMLRuntimeException;
 import com.ibm.bi.dml.runtime.DMLUnsupportedOperationException;
 import com.ibm.bi.dml.runtime.controlprogram.context.ExecutionContext;
@@ -31,10 +30,10 @@ import com.ibm.bi.dml.runtime.matrix.operators.ReorgOperator;
 
 
 public class AppendCPInstruction extends BinaryCPInstruction
-{
-	
+{	
 	public enum AppendType{
 		CBIND,
+		RBIND,
 		STRING,
 	}
 
@@ -50,32 +49,27 @@ public class AppendCPInstruction extends BinaryCPInstruction
 	}
 	
 	public static Instruction parseInstruction ( String str ) 
-		throws DMLRuntimeException {
-		CPOperand in1 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
-		CPOperand in2 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
-		CPOperand in3 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
-		CPOperand out = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
-		
-		//4 parts to the instruction besides opcode and execlocation
-		//two input args, one output arg and offset = 4
-		InstructionUtils.checkNumFields ( str, 4 );
-		
+		throws DMLRuntimeException 
+	{
 		String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
+		InstructionUtils.checkNumFields (parts, 5);
+		
 		String opcode = parts[0];
-		in1.split(parts[1]);
-		in2.split(parts[2]);
-		in3.split(parts[3]);
-		out.split(parts[4]);
-		//String offset_str = parts[4];
-		 
-		AppendType type = (in1.getDataType()==DataType.MATRIX) ? AppendType.CBIND : AppendType.STRING;
+		CPOperand in1 = new CPOperand(parts[1]);
+		CPOperand in2 = new CPOperand(parts[2]);
+		CPOperand in3 = new CPOperand(parts[3]);
+		CPOperand out = new CPOperand(parts[4]);
+		boolean cbind = Boolean.parseBoolean(parts[5]);
 		
+		AppendType type = (in1.getDataType()!=DataType.MATRIX) ? AppendType.STRING :
+						  cbind ? AppendType.CBIND : AppendType.RBIND;
 		
 		if(!opcode.equalsIgnoreCase("append"))
 			throw new DMLRuntimeException("Unknown opcode while parsing a AppendCPInstruction: " + str);
-		else
-			return new AppendCPInstruction(new ReorgOperator(OffsetColumnIndex.getOffsetColumnIndexFnObject(-1)), 
-										   in1, in2, in3, out, type, opcode, str);
+
+		return new AppendCPInstruction(
+				new ReorgOperator(OffsetColumnIndex.getOffsetColumnIndexFnObject(-1)), 
+				in1, in2, in3, out, type, opcode, str);
 	}
 	
 	@Override
@@ -89,18 +83,38 @@ public class AppendCPInstruction extends BinaryCPInstruction
 			MatrixBlock matBlock2 = ec.getMatrixInput(input2.getName());
 			
 			//check input dimensions
-			if(matBlock1.getNumRows() != matBlock2.getNumRows())
-				throw new DMLRuntimeException("Append is not possible for input matrices " 
+			if(matBlock1.getNumRows() != matBlock2.getNumRows()) {
+				throw new DMLRuntimeException("Append-cbind is not possible for input matrices " 
 											  + input1.getName() + " and " + input2.getName()
-											  + "with unequal number of rows");
-			
+											  + " with different number of rows");
+			}
+				
 			//execute append operations (append both inputs to initially empty output)
-			MatrixBlock ret = matBlock1.appendOperations(matBlock2, new MatrixBlock());
+			MatrixBlock ret = matBlock1.appendOperations(matBlock2, new MatrixBlock(), true);
 			
-			//set output
+			//set output and release inputs 
 			ec.setMatrixOutput(output.getName(), ret);
+			ec.releaseMatrixInput(input1.getName());
+			ec.releaseMatrixInput(input2.getName());
+		}
+		else if( _type == AppendType.RBIND )
+		{
+			//get inputs
+			MatrixBlock matBlock1 = ec.getMatrixInput(input1.getName());
+			MatrixBlock matBlock2 = ec.getMatrixInput(input2.getName());
+			
+			//check input dimensions
+			if(matBlock1.getNumColumns() != matBlock2.getNumColumns()) {
+				throw new DMLRuntimeException("Append-rbind is not possible for input matrices " 
+											  + input1.getName() + " and " + input2.getName()
+											  + " with different number of columns");
+			}
 			
-			//release inputs 
+			//execute append operations (append both inputs to initially empty output)
+			MatrixBlock ret = matBlock1.appendOperations(matBlock2, new MatrixBlock(), false);
+			
+			//set output and release inputs 
+			ec.setMatrixOutput(output.getName(), ret);
 			ec.releaseMatrixInput(input1.getName());
 			ec.releaseMatrixInput(input2.getName());
 		}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendGInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendGInstruction.java b/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendGInstruction.java
index 4249b6d..b160ab8 100644
--- a/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendGInstruction.java
+++ b/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendGInstruction.java
@@ -33,32 +33,32 @@ import com.ibm.bi.dml.runtime.matrix.operators.Operator;
 
 public class AppendGInstruction extends AppendInstruction 
 {
-	
 	private long _offset = -1; //cols of input1 
 	private long _offset2 = -1; //cols of input2
-	private long _clen = -1;
-	
+	private long _len = -1;
 	
-	public AppendGInstruction(Operator op, byte in1, byte in2, long offset, long offset2, byte out, String istr)
+	public AppendGInstruction(Operator op, byte in1, byte in2, long offset, long offset2, byte out, boolean cbind, String istr)
 	{
-		super(op, in1, in2, out, istr);
+		super(op, in1, in2, out, cbind, istr);
 		_offset = offset;
 		_offset2 = offset2;
-		_clen = _offset + _offset2;
+		_len = _offset + _offset2;
 	}
 
-	public static Instruction parseInstruction ( String str ) throws DMLRuntimeException {
-		InstructionUtils.checkNumFields ( str, 5 );
-		
+	public static Instruction parseInstruction ( String str ) 
+		throws DMLRuntimeException 
+	{
 		String[] parts = InstructionUtils.getInstructionParts ( str );
-		
+		InstructionUtils.checkNumFields (parts, 6);
+			
 		byte in1 = Byte.parseByte(parts[1]);
 		byte in2 = Byte.parseByte(parts[2]);
 		long offset = (long)(Double.parseDouble(parts[3]));
-		long clen = (long)(Double.parseDouble(parts[4]));
+		long len = (long)(Double.parseDouble(parts[4]));
 		byte out = Byte.parseByte(parts[5]);
+		boolean cbind = Boolean.parseBoolean(parts[6]);
 			
-		return new AppendGInstruction(null, in1, in2, offset, clen, out, str);
+		return new AppendGInstruction(null, in1, in2, offset, len, out, cbind, str);
 	}
 	
 	
@@ -67,6 +67,9 @@ public class AppendGInstruction extends AppendInstruction
 			CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int brlen, int bclen)
 			throws DMLUnsupportedOperationException, DMLRuntimeException 
 	{
+		//setup basic meta data
+		int blen = _cbind ? bclen : brlen;
+		
 		//Step 1: handle first input (forward blocks, change dim of last block)
 		ArrayList<IndexedMatrixValue> blkList1 = cachedValues.get(input1);
 		if( blkList1 != null )
@@ -74,21 +77,22 @@ public class AppendGInstruction extends AppendInstruction
 			{
 				if( in1 == null )
 					continue;
-				
-				if( _offset%bclen == 0 ) //special case: forward only
-				{
+
+				if( _offset%blen == 0 ) { //special case: forward only
 					cachedValues.add(output, in1);	
 				}
 				else //general case: change dims and forward
-				{					
+				{	
 					MatrixIndexes tmpix = in1.getIndexes();
 					MatrixBlock tmpval = (MatrixBlock) in1.getValue(); //always block
-					if( _offset/bclen+1 == tmpix.getColumnIndex() ) //border block
+					if( _cbind && _offset/blen+1 == tmpix.getColumnIndex() //border block
+						|| !_cbind && _offset/blen+1 == tmpix.getRowIndex()) 
 					{
 						IndexedMatrixValue data = cachedValues.holdPlace(output, valueClass);
 						MatrixBlock tmpvalNew = (MatrixBlock)data.getValue(); //always block
-						int cols = Math.min(bclen, (int)(_clen-(tmpix.getColumnIndex()-1)*bclen));
-						tmpvalNew.reset(tmpval.getNumRows(), cols);						
+						int lrlen = _cbind ? tmpval.getNumRows() : Math.min(blen, (int)(_len-(tmpix.getRowIndex()-1)*blen));
+				        int lclen = _cbind ? Math.min(blen, (int)(_len-(tmpix.getColumnIndex()-1)*blen)) : tmpval.getNumColumns();
+						tmpvalNew.reset(lrlen, lclen);
 						tmpvalNew.copy(0, tmpval.getNumRows()-1, 0, tmpval.getNumColumns()-1, tmpval, true);
 						data.getIndexes().setIndexes(tmpix);
 					}
@@ -106,47 +110,80 @@ public class AppendGInstruction extends AppendInstruction
 			{
 				if( in2 == null )
 					continue;
+
+				MatrixIndexes tmpix = in2.getIndexes();
+				MatrixBlock tmpval = (MatrixBlock) in2.getValue(); //always block
 				
 				if( _offset%bclen == 0 ) //special case no split
 				{
 					IndexedMatrixValue data = cachedValues.holdPlace(output, valueClass);
-					MatrixIndexes ixtmp = data.getIndexes();
-					ixtmp.setIndexes(in2.getIndexes().getRowIndex(), 
-							         _offset/bclen + in2.getIndexes().getColumnIndex());
-					data.set(ixtmp, in2.getValue());
+					MatrixIndexes ix1 = data.getIndexes();
+					long rix = _cbind ? tmpix.getRowIndex() : _offset/blen + tmpix.getRowIndex();
+					long cix = _cbind ? _offset/blen + tmpix.getColumnIndex() : tmpix.getColumnIndex();
+					ix1.setIndexes(rix, cix);
+					data.set(ix1, in2.getValue());
 				}
 				else //general case: split and forward
-				{		
-					MatrixIndexes tmpix = in2.getIndexes();
-					MatrixBlock tmpval = (MatrixBlock) in2.getValue(); //always block
-					
-					//first half
+				{	
 					IndexedMatrixValue data1 = cachedValues.holdPlace(output, valueClass);
 					MatrixIndexes ix1 = data1.getIndexes();
 					MatrixBlock tmpvalNew = (MatrixBlock)data1.getValue(); //always block
-					int cix1 = (int)(_offset/bclen + in2.getIndexes().getColumnIndex());
-					int cols1 = Math.min(bclen, (int)(_clen-(long)(cix1-1)*bclen));
-					ix1.setIndexes( tmpix.getRowIndex(), cix1);
-					tmpvalNew.reset( tmpval.getNumRows(), cols1 );
-					tmpvalNew.copy(0, tmpval.getNumRows()-1, (int)((_offset+1)%bclen)-1, cols1-1, 
-							       tmpval.sliceOperations(0, tmpval.getNumRows()-1, 0, 
-							    		                     (int)(cols1-((_offset)%bclen)-1), new MatrixBlock()), true);
-					data1.getIndexes().setIndexes(ix1);
 					
-					if( cols1-((_offset)%bclen)<tmpval.getNumColumns() ) 
+					if( _cbind )
+					{
+						//first half
+						int cix1 = (int)(_offset/blen + tmpix.getColumnIndex());
+						int cols1 = Math.min(blen, (int)(_len-(long)(cix1-1)*blen));
+						ix1.setIndexes( tmpix.getRowIndex(), cix1);
+						tmpvalNew.reset( tmpval.getNumRows(), cols1 );
+						tmpvalNew.copy(0, tmpval.getNumRows()-1, (int)((_offset+1)%blen)-1, cols1-1, 
+								       tmpval.sliceOperations(0, tmpval.getNumRows()-1, 0, 
+								    		                     (int)(cols1-((_offset)%blen)-1), new MatrixBlock()), true);
+						data1.getIndexes().setIndexes(ix1);
+						
+						if( cols1-((_offset)%blen)<tmpval.getNumColumns() ) 
+						{
+							//second half (if required)
+							IndexedMatrixValue data2 = cachedValues.holdPlace(output, valueClass);
+							MatrixIndexes ix2 = data2.getIndexes();
+							MatrixBlock tmpvalNew2 = (MatrixBlock)data2.getValue(); //always block
+							int cix2 = (int)(_offset/blen + 1 + tmpix.getColumnIndex());
+							int cols2 = Math.min(blen, (int)(_len-(long)(cix2-1)*blen));
+							ix2.setIndexes( tmpix.getRowIndex(), cix2);
+							tmpvalNew2.reset( tmpval.getNumRows(), cols2 );
+							tmpvalNew2.copy(0, tmpval.getNumRows()-1, 0, cols2-1, 
+									       tmpval.sliceOperations(0, tmpval.getNumRows()-1, (int)(cols1-((_offset)%blen)), 
+									    		                     tmpval.getNumColumns()-1, new MatrixBlock()), true);
+							data2.getIndexes().setIndexes(ix2);
+						}	
+					}
+					else //rbind
 					{
-						//second half (if required)
-						IndexedMatrixValue data2 = cachedValues.holdPlace(output, valueClass);
-						MatrixIndexes ix2 = data2.getIndexes();
-						MatrixBlock tmpvalNew2 = (MatrixBlock)data2.getValue(); //always block
-						int cix2 = (int)(_offset/bclen + 1 + in2.getIndexes().getColumnIndex());
-						int cols2 = Math.min(bclen, (int)(_clen-(long)(cix2-1)*bclen));
-						ix2.setIndexes( tmpix.getRowIndex(), cix2);
-						tmpvalNew2.reset( tmpval.getNumRows(), cols2 );
-						tmpvalNew2.copy(0, tmpval.getNumRows()-1, 0, cols2-1, 
-								       tmpval.sliceOperations(0, tmpval.getNumRows()-1, (int)(cols1-((_offset)%bclen)), 
-								    		                     tmpval.getNumColumns()-1, new MatrixBlock()), true);
-						data2.getIndexes().setIndexes(ix2);
+						//first half
+						int rix1 = (int)(_offset/blen + tmpix.getRowIndex());
+						int rows1 = Math.min(blen, (int)(_len-(long)(rix1-1)*blen));
+						ix1.setIndexes( rix1, tmpix.getColumnIndex());
+						tmpvalNew.reset( rows1, tmpval.getNumColumns() );
+						tmpvalNew.copy((int)((_offset+1)%blen)-1, rows1-1, 0, tmpval.getNumColumns()-1,  
+								       tmpval.sliceOperations(0,(int)(rows1-((_offset)%blen)-1), 
+								    		   0, tmpval.getNumColumns()-1, new MatrixBlock()), true);
+						data1.getIndexes().setIndexes(ix1);
+						
+						if( rows1-((_offset)%blen)<tmpval.getNumRows() ) 
+						{
+							//second half (if required)
+							IndexedMatrixValue data2 = cachedValues.holdPlace(output, valueClass);
+							MatrixIndexes ix2 = data2.getIndexes();
+							MatrixBlock tmpvalNew2 = (MatrixBlock)data2.getValue(); //always block
+							int rix2 = (int)(_offset/blen + 1 + tmpix.getRowIndex());
+							int rows2 = Math.min(blen, (int)(_len-(long)(rix2-1)*blen));
+							ix2.setIndexes(rix2, tmpix.getColumnIndex());
+							tmpvalNew2.reset( rows2, tmpval.getNumColumns() );
+							tmpvalNew2.copy(0, rows2-1, 0, tmpval.getNumColumns()-1,  
+									       tmpval.sliceOperations((int)(rows1-((_offset)%blen)), tmpval.getNumRows()-1, 
+									    		   0, tmpval.getNumColumns()-1, new MatrixBlock()), true);
+							data2.getIndexes().setIndexes(ix2);
+						}	
 					}
 				}
 			}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendInstruction.java b/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendInstruction.java
index 3ef737f..41476ee 100644
--- a/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendInstruction.java
+++ b/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendInstruction.java
@@ -30,7 +30,8 @@ import com.ibm.bi.dml.runtime.matrix.operators.Operator;
 
 public class AppendInstruction extends BinaryMRInstructionBase 
 {
-		
+	protected boolean _cbind = true;
+	
 	/**
 	 * 
 	 * @param op
@@ -39,13 +40,18 @@ public class AppendInstruction extends BinaryMRInstructionBase
 	 * @param out
 	 * @param istr
 	 */
-	public AppendInstruction(Operator op, byte in1, byte in2, byte out, String istr)
+	public AppendInstruction(Operator op, byte in1, byte in2, byte out, boolean cbind, String istr)
 	{
 		super(op, in1, in2, out);
 		instString = istr;	
 		mrtype = MRINSTRUCTION_TYPE.Append;
+		_cbind = cbind;
 	}
 
+	public boolean isCBind() {
+		return _cbind;
+	}
+	
 	/**
 	 * 
 	 * @param str

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendMInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendMInstruction.java b/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendMInstruction.java
index 6c9f5a9..43f10a8 100644
--- a/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendMInstruction.java
+++ b/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendMInstruction.java
@@ -38,26 +38,26 @@ public class AppendMInstruction extends AppendInstruction implements IDistribute
 {	
 	private long _offset = -1; 
 	
-	public AppendMInstruction(Operator op, byte in1, byte in2, long offset, CacheType type, byte out, String istr)
+	public AppendMInstruction(Operator op, byte in1, byte in2, long offset, CacheType type, byte out, boolean cbind, String istr)
 	{
-		super(op, in1, in2, out, istr);
+		super(op, in1, in2, out, cbind, istr);
 		_offset = offset;
 	}
 	
 	public static Instruction parseInstruction ( String str ) 
 		throws DMLRuntimeException 
 	{
-		InstructionUtils.checkNumFields ( str, 5 );
-		
 		String[] parts = InstructionUtils.getInstructionParts ( str );
+		InstructionUtils.checkNumFields(parts, 6);
 		
 		byte in1 = Byte.parseByte(parts[1]);
 		byte in2 = Byte.parseByte(parts[2]);
 		long offset = (long)(Double.parseDouble(parts[3]));
 		byte out = Byte.parseByte(parts[4]);
 		CacheType type = CacheType.valueOf(parts[5]);
+		boolean cbind = Boolean.parseBoolean(parts[6]);
 		
-		return new AppendMInstruction(null, in1, in2, offset, type, out, str);
+		return new AppendMInstruction(null, in1, in2, offset, type, out, cbind, str);
 	}
 	
 	@Override //IDistributedCacheConsumer
@@ -89,45 +89,64 @@ public class AppendMInstruction extends AppendInstruction implements IDistribute
 				continue;
 		
 			//check for boundary block
-			long lastBlockColIndex = (long)Math.ceil((double)_offset/blockColFactor);
+			int blen = _cbind ? blockColFactor : blockRowFactor;
+			long lastBlockColIndex = (long)Math.ceil((double)_offset/blen);	
 			
 			//case 1: pass through of non-boundary blocks
-			if( in1.getIndexes().getColumnIndex()!=lastBlockColIndex ) {
+			MatrixIndexes ix = in1.getIndexes();
+			if( (_cbind?ix.getColumnIndex():ix.getRowIndex())!=lastBlockColIndex ) {
 				cachedValues.add(output, in1);
 			}
 			//case 2: pass through full input block and rhs block 
-			else if( in1.getValue().getNumColumns() == blockColFactor ) {
+			else if( _cbind && in1.getValue().getNumColumns() == blen 
+					|| !_cbind && in1.getValue().getNumRows() == blen ) {
 				//output lhs block
 				cachedValues.add(output, in1);
 				
 				//output shallow copy of rhs block
 				DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(input2);
-				IndexedMatrixValue tmp = new IndexedMatrixValue(
-						new MatrixIndexes(in1.getIndexes().getRowIndex(), in1.getIndexes().getColumnIndex()+1),
-						dcInput.getDataBlock((int)in1.getIndexes().getRowIndex(), 1).getValue());
-				cachedValues.add(output, tmp);
+				if( _cbind ) {
+					cachedValues.add(output, new IndexedMatrixValue(
+							new MatrixIndexes(ix.getRowIndex(), ix.getColumnIndex()+1),
+							dcInput.getDataBlock((int)ix.getRowIndex(), 1).getValue()));
+				}
+				else {
+					cachedValues.add(output, new IndexedMatrixValue(
+							new MatrixIndexes(ix.getRowIndex()+1, ix.getColumnIndex()),
+							dcInput.getDataBlock(1, (int)ix.getColumnIndex()).getValue()));	
+				}
 			}
 			//case 3: append operation on boundary block
 			else 
 			{
 				DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(input2);
-				MatrixValue value_in2 = dcInput.getDataBlock((int)in1.getIndexes().getRowIndex(), 1).getValue();
 				
 				//allocate space for the output value
 				ArrayList<IndexedMatrixValue> outlist=new ArrayList<IndexedMatrixValue>(2);
 				IndexedMatrixValue first=cachedValues.holdPlace(output, valueClass);
-				first.getIndexes().setIndexes(in1.getIndexes());
+				first.getIndexes().setIndexes(ix);
 				outlist.add(first);
 				
-				if(in1.getValue().getNumColumns()+value_in2.getNumColumns()>blockColFactor)
-				{
-					IndexedMatrixValue second=cachedValues.holdPlace(output, valueClass);
-					second.getIndexes().setIndexes(in1.getIndexes().getRowIndex(), in1.getIndexes().getColumnIndex()+1);
-					outlist.add(second);
+				MatrixValue value_in2 = null;
+				if( _cbind ) {
+					value_in2 = dcInput.getDataBlock((int)ix.getRowIndex(), 1).getValue();
+					if(in1.getValue().getNumColumns()+value_in2.getNumColumns()>blen) {
+						IndexedMatrixValue second=cachedValues.holdPlace(output, valueClass);
+						second.getIndexes().setIndexes(ix.getRowIndex(), ix.getColumnIndex()+1);
+						outlist.add(second);
+					}
+				}
+				else { //rbind
+					value_in2 = dcInput.getDataBlock(1, (int)ix.getRowIndex()).getValue();
+					if(in1.getValue().getNumRows()+value_in2.getNumRows()>blen) {
+						IndexedMatrixValue second=cachedValues.holdPlace(output, valueClass);
+						second.getIndexes().setIndexes(ix.getRowIndex()+1, ix.getColumnIndex());
+						outlist.add(second);
+					}
 				}
 	
 				OperationsOnMatrixValues.performAppend(in1.getValue(), value_in2, outlist, 
-					blockRowFactor, blockColFactor, true, 0);			
+					blockRowFactor, blockColFactor, _cbind, true, 0);			
 			}
 		}
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendRInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendRInstruction.java b/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendRInstruction.java
index 156a442..5429b1d 100644
--- a/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendRInstruction.java
+++ b/src/main/java/com/ibm/bi/dml/runtime/instructions/mr/AppendRInstruction.java
@@ -29,22 +29,23 @@ import com.ibm.bi.dml.runtime.matrix.operators.Operator;
 
 public class AppendRInstruction extends AppendInstruction 
 {
-	
-	public AppendRInstruction(Operator op, byte in1, byte in2, byte out, String istr)
+	public AppendRInstruction(Operator op, byte in1, byte in2, byte out, boolean cbind, String istr)
 	{
-		super(op, in1, in2, out, istr);
+		super(op, in1, in2, out, cbind, istr);
 	}
 
-	public static Instruction parseInstruction ( String str ) throws DMLRuntimeException {
-		InstructionUtils.checkNumFields ( str, 3 );
-		
+	public static Instruction parseInstruction ( String str ) 
+		throws DMLRuntimeException 
+	{
 		String[] parts = InstructionUtils.getInstructionParts ( str );
+		InstructionUtils.checkNumFields(parts, 5);
 		
 		byte in1 = Byte.parseByte(parts[1]);
 		byte in2 = Byte.parseByte(parts[2]);
 		byte out = Byte.parseByte(parts[3]);
+		boolean cbind = Boolean.parseBoolean(parts[4]);
 			
-		return new AppendRInstruction(null, in1, in2, out, str);
+		return new AppendRInstruction(null, in1, in2, out, cbind, str);
 	}
 	
 	
@@ -65,7 +66,7 @@ public class AppendRInstruction extends AppendInstruction
 		MatrixBlock mbLeft = (MatrixBlock)left.getValue();
 		MatrixBlock mbRight = (MatrixBlock)right.getValue();
 		
-		MatrixBlock ret = mbLeft.appendOperations(mbRight, new MatrixBlock());
+		MatrixBlock ret = mbLeft.appendOperations(mbRight, new MatrixBlock(), _cbind);
 		
 		//put result into cache
 		cachedValues.add(output, new IndexedMatrixValue(left.getIndexes(), ret));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1043f433/src/main/java/com/ibm/bi/dml/runtime/instructions/spark/AppendGAlignedSPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/ibm/bi/dml/runtime/instructions/spark/AppendGAlignedSPInstruction.java b/src/main/java/com/ibm/bi/dml/runtime/instructions/spark/AppendGAlignedSPInstruction.java
index 5d85914..67dc07e 100644
--- a/src/main/java/com/ibm/bi/dml/runtime/instructions/spark/AppendGAlignedSPInstruction.java
+++ b/src/main/java/com/ibm/bi/dml/runtime/instructions/spark/AppendGAlignedSPInstruction.java
@@ -38,32 +38,34 @@ import com.ibm.bi.dml.runtime.matrix.operators.ReorgOperator;
 
 public class AppendGAlignedSPInstruction extends BinarySPInstruction
 {
+	private boolean _cbind = true;
 	
-	public AppendGAlignedSPInstruction(Operator op, CPOperand in1, CPOperand in2, CPOperand in3, CPOperand out, String opcode, String istr)
+	public AppendGAlignedSPInstruction(Operator op, CPOperand in1, CPOperand in2, CPOperand in3, CPOperand out, boolean cbind, String opcode, String istr)
 	{
 		super(op, in1, in2, out, opcode, istr);
 		_sptype = SPINSTRUCTION_TYPE.GAppend;
+		_cbind = cbind;
 	}
 	
 	public static Instruction parseInstruction ( String str ) 
 		throws DMLRuntimeException
 	{
-		//4 parts to the instruction besides opcode and execlocation
-		//two input args, one output arg and offset = 4
-		InstructionUtils.checkNumFields ( str, 4 );
-		
 		String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
+		InstructionUtils.checkNumFields (parts, 5);
+		
 		String opcode = parts[0];
 		CPOperand in1 = new CPOperand(parts[1]);
 		CPOperand in2 = new CPOperand(parts[2]);
 		CPOperand in3 = new CPOperand(parts[3]);
 		CPOperand out = new CPOperand(parts[4]);
+		boolean cbind = Boolean.parseBoolean(parts[5]);
 		
 		if(!opcode.equalsIgnoreCase("galignedappend"))
 			throw new DMLRuntimeException("Unknown opcode while parsing a AppendGSPInstruction: " + str);
-		else
-			return new AppendGAlignedSPInstruction(new ReorgOperator(OffsetColumnIndex.getOffsetColumnIndexFnObject(-1)), 
-										   in1, in2, in3, out, opcode, str);
+		
+		return new AppendGAlignedSPInstruction(
+				new ReorgOperator(OffsetColumnIndex.getOffsetColumnIndexFnObject(-1)), 
+				in1, in2, in3, out, cbind, opcode, str);
 	}
 	
 	@Override
@@ -71,38 +73,21 @@ public class AppendGAlignedSPInstruction extends BinarySPInstruction
 		throws DMLUnsupportedOperationException, DMLRuntimeException 
 	{
 		// general case append (map-extend, aggregate)
-
 		SparkExecutionContext sec = (SparkExecutionContext)ec;
+		checkBinaryAppendInputCharacteristics(sec, _cbind, false, true);
 		MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
-		MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
-		
-		if(!mc1.dimsKnown() || !mc2.dimsKnown()) {
-			throw new DMLRuntimeException("The dimensions unknown for inputs");
-		}
-		else if(mc1.getRows() != mc2.getRows()) {
-			throw new DMLRuntimeException("The number of rows of inputs should match for append instruction");
-		}
-		else if(mc1.getRowsPerBlock() != mc2.getRowsPerBlock() || mc1.getColsPerBlock() != mc2.getColsPerBlock()) {
-			throw new DMLRuntimeException("The block sizes donot match for input matrices");
-		}
 		
 		JavaPairRDD<MatrixIndexes,MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable( input1.getName() );
 		JavaPairRDD<MatrixIndexes,MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable( input2.getName() );
 		JavaPairRDD<MatrixIndexes,MatrixBlock> out = null;
 		
-		if(mc1.getCols() % mc1.getColsPerBlock() == 0) {
-			// Simple changing of matrix indexes of RHS
-			long shiftBy = (long) Math.ceil((double)mc1.getCols() / mc1.getColsPerBlock());
-			out = in1.union(
-						in2.mapToPair(new ShiftColumnIndex(shiftBy))
-					);
-		}
-		else {
-			throw new DMLRuntimeException("Incorrect append instruction when mc1.getCols() % mc1.getColsPerBlock() != 0. Should have used AppendGSP");
-		}
+		// Simple changing of matrix indexes of RHS
+		long shiftBy = _cbind ? mc1.getNumColBlocks() : mc1.getNumRowBlocks();		
+		out = in2.mapToPair(new ShiftColumnIndex(shiftBy, _cbind));
+		out = in1.union( out );
 		
 		//put output RDD handle into symbol table
-		updateBinaryAppendOutputMatrixCharacteristics(sec);
+		updateBinaryAppendOutputMatrixCharacteristics(sec, _cbind);
 		sec.setRDDHandleForVariable(output.getName(), out);
 		sec.addLineageRDD(output.getName(), input1.getName());
 		sec.addLineageRDD(output.getName(), input2.getName());
@@ -116,16 +101,20 @@ public class AppendGAlignedSPInstruction extends BinarySPInstruction
 		private static final long serialVersionUID = -5185023611319654242L;
 		
 		private long _shiftBy;
+		private boolean _cbind;
 		
-		public ShiftColumnIndex(long shiftBy) {
+		public ShiftColumnIndex(long shiftBy, boolean cbind) {
 			_shiftBy = shiftBy;
+			_cbind = cbind;
 		}
 
 		@Override
 		public Tuple2<MatrixIndexes, MatrixBlock> call(Tuple2<MatrixIndexes, MatrixBlock> kv) 
 			throws Exception 
 		{	
-			return new Tuple2<MatrixIndexes, MatrixBlock>(new MatrixIndexes(kv._1.getRowIndex(), kv._1.getColumnIndex()+_shiftBy), kv._2);
+			long rix = _cbind ? kv._1.getRowIndex() : kv._1.getRowIndex() + _shiftBy;
+			long cix = _cbind ? kv._1.getColumnIndex() + _shiftBy : kv._1.getColumnIndex();			
+			return new Tuple2<MatrixIndexes, MatrixBlock>(new MatrixIndexes(rix, cix), kv._2);
 		}
 	}
 }
\ No newline at end of file