You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2017/07/13 21:12:16 UTC
systemml git commit: [HOTFIX] [SYSTEMML-540] Bugfix in validation of convolution operations.

Repository: systemml
Updated Branches:
  refs/heads/master 8707ff1d4 -> f046051d4


[HOTFIX] [SYSTEMML-540] Bugfix in validation of convolution operations.

- Also, updated the DML documentation as per Prithvi's recommendation.
- This hotfix also contains minor bugfix in getP, getQ methods of
  ConvolutionUtils and also ConvolutionParameter wrapper class.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/f046051d
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/f046051d
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/f046051d

Branch: refs/heads/master
Commit: f046051d43478a8e850092b4303a85d1a86f5dbb
Parents: 8707ff1
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Thu Jul 13 13:12:09 2017 -0800
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Thu Jul 13 14:12:09 2017 -0700

----------------------------------------------------------------------
 docs/dml-language-reference.md                  | 18 ++--
 .../sysml/parser/BuiltinFunctionExpression.java | 88 ++++++++++----------
 .../matrix/data/ConvolutionParameters.java      | 10 ++-
 .../sysml/runtime/util/ConvolutionUtils.java    | 17 ++--
 4 files changed, 65 insertions(+), 68 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/f046051d/docs/dml-language-reference.md
----------------------------------------------------------------------
diff --git a/docs/dml-language-reference.md b/docs/dml-language-reference.md
index ad90ca1..26b02eb 100644
--- a/docs/dml-language-reference.md
+++ b/docs/dml-language-reference.md
@@ -1508,15 +1508,15 @@ The images are assumed to be stored NCHW format, where N = batch size, C = #chan
 Hence, the images are internally represented as a matrix with dimension (N, C * H * W).
 
 
-| Function name          | Input matrices | Input Parameters                                                                                                                                                                            | Notes                                                                                    |
-|------------------------|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
-| conv2d                 | input, filter  | stride=[stride_h, stride_w], padding=[pad_h, pad_w], input_shape=[batch_size, num_channels, height_image, width_image], filter_shape=[numFilters, numChannels, height_filter, width_filter] | Performs 2D convolution operation                                                        |
-| conv2d_backward_filter | input, dout    | stride=[stride_h, stride_w], padding=[pad_h, pad_w], input_shape=[batch_size, num_channels, height_image, width_image], filter_shape=[numFilters, numChannels, height_filter, width_filter] | Computes the gradients wrt filter of 2D convolution                                      |
-| conv2d_backward_data   | filter, dout   | stride=[stride_h, stride_w], padding=[pad_h, pad_w], input_shape=[batch_size, num_channels, height_image, width_image], filter_shape=[numFilters, numChannels, height_filter, width_filter] | Computes the gradients wrt input of 2D convolution                                       |
-| max_pool               | input          | stride=[stride_h, stride_w], padding=[pad_h, pad_w], input_shape=[batch_size, num_channels, height_image, width_image], pool_size=[height_pool, width_pool]                                 | Performs max pooling operation                                                           |
-| max_pool_backward      | input, dout    | stride=[stride_h, stride_w], padding=[pad_h, pad_w], input_shape=[batch_size, num_channels, height_image, width_image], pool_size=[height_pool, width_pool]                                 | Computes the gradients wrt input of 2D maxpooling                                        |
-| bias_add               | input, bias    |                                                                                                                                                                                             | Adds the bias (row vector of size numChannels) to input with the given numChannels       |
-| bias_multiply          | input, bias    |                                                                                                                                                                                             | Multiplies the bias (row vector of size numChannels) to input with the given numChannels |
+| Function name              | Input matrices | Dimension of first input matrix                           | Dimension of second input matrix (if applicable)          | Dimension of output matrix                                 | Input Parameters                                                                                                                                                                              | Notes                                                                                                                                             |
+|----------------------------|----------------|-----------------------------------------------------------|-----------------------------------------------------------|------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------|
+| conv2d                     | input, filter  | [batch_size X num_channels* height_image* width_image]    | [num_filters X num_channels* height_filter* width_filter] | [batch_size X num_channels_out* height_out* width_out]     | stride=[stride_h, stride_w], padding=[pad_h, pad_w], input_shape=[batch_size, num_channels, height_image, width_image], filter_shape=[num_filters, num_channels, height_filter, width_filter] | Performs 2D convolution operation                                                                                                                 |
+| conv2d_backward_filter     | input, dout    | [batch_size X num_channels* height_image* width_image]    | [batch_size X num_channels_out* height_out* width_out]    | [num_filters X num_channels* height_filter* width_filter]  | stride=[stride_h, stride_w], padding=[pad_h, pad_w], input_shape=[batch_size, num_channels, height_image, width_image], filter_shape=[num_filters, num_channels, height_filter, width_filter] | Computes the gradients wrt filter of 2D convolution                                                                                               |
+| conv2d_backward_data       | filter, dout   | [num_filters X num_channels* height_filter* width_filter] | [batch_size X num_channels_out* height_out* width_out]    | [batch_size X num_channels* height_image* width_image]     | stride=[stride_h, stride_w], padding=[pad_h, pad_w], input_shape=[batch_size, num_channels, height_image, width_image], filter_shape=[num_filters, num_channels, height_filter, width_filter] | Computes the gradients wrt input of 2D convolution                                                                                                |
+| max_pool                   | input          | [batch_size X num_channels* height_image* width_image]    |                                                           | [batch_size X num_channels* height_out* width_out]         | stride=[stride_h, stride_w], padding=[pad_h, pad_w], input_shape=[batch_size, num_channels, height_image, width_image], pool_size=[height_pool, width_pool]                                   | Performs max pooling operation                                                                                                                    |
+| max_pool_backward          | input, dout    | [batch_size X num_channels* height_image* width_image]    | [batch_size X num_channels* height_out* width_out]        | [batch_size X num_channels* height_image* width_image]     | stride=[stride_h, stride_w], padding=[pad_h, pad_w], input_shape=[batch_size, num_channels, height_image, width_image], pool_size=[height_pool, width_pool]                                   | Computes the gradients wrt input of 2D maxpooling                                                                                                 |
+| bias_add                   | input, bias    | [batch_size X num_channels* height_image* width_image]    | [num_channels X 1]                                        | [batch_size X num_channels* height_image* width_image]     |                                                                                                                                                                                               | Adds the bias (row vector of size num_channels) to input with the given num_channels                                                              |
+| bias_multiply              | input, bias    | [batch_size X num_channels* height_image* width_image]    | [num_channels X 1]                                        | [batch_size X num_channels* height_image* width_image]     |                                                                                                                                                                                               | Multiplies the bias (row vector of size num_channels) to input with the given num_channels                                                        |
 
 
 Examples:

http://git-wip-us.apache.org/repos/asf/systemml/blob/f046051d/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
index c906583..58760bc 100644
--- a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
+++ b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
@@ -1116,61 +1116,57 @@ public class BuiltinFunctionExpression extends DataIdentifier
 			// conv2d_backward_filter and conv2d_backward_data
 			Expression input = _args[0]; // For conv2d_backward_filter, this is input and for conv2d_backward_data, this is filter
 			
-			Expression filter = null;
+			Expression input2 = null;
 			if(!(this.getOpCode() == BuiltinFunctionOp.MAX_POOL || this.getOpCode() == BuiltinFunctionOp.AVG_POOL)) {
-				filter = _args[1];			// For conv2d_backward functions, this is dout
-				checkMatrixParam(filter);
+				input2 = _args[1];			// For conv2d_backward functions, this is dout
+				checkMatrixParam(input2);
 			}
 			output.setDataType(DataType.MATRIX);
 			output.setValueType(ValueType.DOUBLE);
 			output.setBlockDimensions(input.getOutput().getRowsInBlock(), input.getOutput().getColumnsInBlock());
 			// stride1, stride2, padding1, padding2, numImg, numChannels, imgSize, imgSize, 
  			// filter_shape1=1, filter_shape2=1, filterSize/poolSize1, filterSize/poolSize1
- 			if( getOpCode() == BuiltinFunctionOp.MAX_POOL_BACKWARD ) {
- 				output.setDimensions(input.getOutput().getDim1(), input.getOutput().getDim2());
- 			}
-			else if( getOpCode() == BuiltinFunctionOp.CONV2D_BACKWARD_DATA ) {
-				//args[0] .. filter, args[1] .. input
-				output.setDimensions(_args[1].getOutput().getDim1(), -1);
+			try {
+				int start = 2;
+				if(!(this.getOpCode() == BuiltinFunctionOp.MAX_POOL || this.getOpCode() == BuiltinFunctionOp.AVG_POOL)) {
+					start = 1;
+				}
+				long stride_h = (long) getDoubleValue(_args[start++]);
+				long stride_w = (long) getDoubleValue(_args[start++]);
+				long pad_h = (long) getDoubleValue(_args[start++]);
+				long pad_w = (long) getDoubleValue(_args[start++]); 
+				long N = (long) getDoubleValue(_args[start++]);
+				long C = (long) getDoubleValue(_args[start++]);
+				long H = (long) getDoubleValue(_args[start++]);
+				long W = (long) getDoubleValue(_args[start++]);
+				long K = -1;
+				if(!(this.getOpCode() == BuiltinFunctionOp.MAX_POOL || this.getOpCode() == BuiltinFunctionOp.AVG_POOL)) {
+					K = (long) getDoubleValue(_args[start]);
+				}
+				start++; start++; // Increment index for K and C
+				long R = (long) getDoubleValue(_args[start++]);
+				long S = (long) getDoubleValue(_args[start++]);
+				long P = ConvolutionUtils.getP(H, R, stride_h, pad_h);
+				long Q = ConvolutionUtils.getP(W, S, stride_w, pad_w);
+				if(this.getOpCode() == BuiltinFunctionOp.CONV2D) 
+					output.setDimensions(N, K*P*Q);
+				else if(this.getOpCode() == BuiltinFunctionOp.CONV2D_BACKWARD_FILTER)
+					output.setDimensions(K, C*R*S);
+				else if(this.getOpCode() == BuiltinFunctionOp.CONV2D_BACKWARD_DATA)
+					output.setDimensions(N, C*H*W);
+				else if(this.getOpCode() == BuiltinFunctionOp.MAX_POOL)
+					output.setDimensions(N, C*P*Q);
+				else if(this.getOpCode() == BuiltinFunctionOp.MAX_POOL_BACKWARD)
+					output.setDimensions(N, C*H*W);
+				else
+					throw new LanguageException("");
+			}
+			catch(Exception e) {
+				output.setDimensions(input.getOutput().getDim1(), -1); // To make sure that output dimensions are not incorrect
 			}
- 			else if(this.getOpCode() == BuiltinFunctionOp.CONV2D_BACKWARD_FILTER) {
- 				output.setDimensions(filter.getOutput().getDim1(), filter.getOutput().getDim2());
- 			}
- 			else if(this.getOpCode() == BuiltinFunctionOp.CONV2D || this.getOpCode() == BuiltinFunctionOp.MAX_POOL) {
- 				try {
- 					int start = 1;
- 					if(this.getOpCode() == BuiltinFunctionOp.CONV2D) {
- 						start = 2;
- 					}
- 					long stride_h = (long) getDoubleValue(_args[start++]);
- 					long stride_w = (long) getDoubleValue(_args[start++]);
- 					long pad_h = (long) getDoubleValue(_args[start++]);
- 					long pad_w = (long) getDoubleValue(_args[start++]); 
- 					start++;
- 					long C = (long) getDoubleValue(_args[start++]);
- 					long H = (long) getDoubleValue(_args[start++]);
- 					long W = (long) getDoubleValue(_args[start++]);
- 					long K = -1;
- 					if(this.getOpCode() == BuiltinFunctionOp.CONV2D) {
- 						K = (long) getDoubleValue(_args[start]);
- 					}
- 					start++; start++;
- 					long R = (long) getDoubleValue(_args[start++]);
- 					long S = (long) getDoubleValue(_args[start++]);
- 					long P = ConvolutionUtils.getP(H, R, stride_h, pad_h);
- 					long Q = ConvolutionUtils.getP(W, S, stride_w, pad_w);
- 					if(this.getOpCode() == BuiltinFunctionOp.CONV2D)
- 						output.setDimensions(input.getOutput().getDim1(), K*P*Q);
- 					else
- 						output.setDimensions(input.getOutput().getDim1(), C*P*Q);
- 				}
- 				catch(Exception e) {
- 					output.setDimensions(input.getOutput().getDim1(), -1); // To make sure that output dimensions are not incorrect
- 				}
- 			}
- 			else
- 				throw new LanguageException("Unsupported op: " + this.getOpCode());
 			checkMatrixParam(input);
+			if(input2 != null)
+				checkMatrixParam(input2);
 			break;
 		}
 		default:

http://git-wip-us.apache.org/repos/asf/systemml/blob/f046051d/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java b/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java
index a24a736..78b6e3b 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java
@@ -103,8 +103,14 @@ public class ConvolutionParameters implements Serializable {
 		this.stride_w = stride_w;
 		this.pad_h = pad_h;
 		this.pad_w = pad_w;
-		P = (int) ConvolutionUtils.getP(H, R, stride_h, pad_h);
-		Q = (int) ConvolutionUtils.getQ(W, S, stride_w, pad_w);
+		if(H <= 0 || R <= 0 || stride_h < 0 || pad_h < 0)
+			P = -1;
+		else
+			P = (int) ConvolutionUtils.getP(H, R, stride_h, pad_h);
+		if(W <= 0 || S <= 0 || stride_w < 0 || pad_w < 0)
+			Q = -1;
+		else
+			Q = (int) ConvolutionUtils.getQ(W, S, stride_w, pad_w);
 		this.numThreads = numThreads;
 	}
 	

http://git-wip-us.apache.org/repos/asf/systemml/blob/f046051d/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java b/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java
index 8e51405..76f4c34 100644
--- a/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java
@@ -44,21 +44,16 @@ public class ConvolutionUtils {
 	}
 	
 	public static long getP(long H, long R, long verticalStride, long heightPadding) {
-		long ret = (H + 2 * heightPadding - R) / verticalStride + 1;
-		if(ret <= 0) {
-			throw new RuntimeException("Incorrect output patch size: "
-					+ "(image_height + 2 * pad_h - filter_height) / verticalStride + 1) needs to be positive, but is " + ret
-					+ " (" + H + " + 2 * " + heightPadding + " - " + R + ") / " + verticalStride + " + 1))");
+		if(H <= 0 || R <= 0 || heightPadding < 0 || verticalStride < 0) {
+			throw new RuntimeException("Incorrect parameters: height=" + H + " filter_height=" + R + " stride=" + verticalStride + " pad=" + heightPadding);
 		}
-		return ret;
+		return (H + 2 * heightPadding - R) / verticalStride + 1;
 	}
 	public static long getQ(long W, long S, long horizontalStride, long widthPadding) {
-		long ret = (W + 2 * widthPadding - S) / horizontalStride + 1;
-		if(ret <= 0) {
-			throw new RuntimeException("Incorrect output patch size: (image_width + 2 * pad_w - filter_width) / horizontalStride + 1) needs to be positive, but is " + ret
-					+ " (" + W + " + 2 * " + widthPadding + " - " + S + ") / " + horizontalStride + " + 1))");
+		if(W <= 0 || S <= 0 || widthPadding < 0 || horizontalStride < 0) {
+			throw new RuntimeException("Incorrect parameters: width=" + W + " filter_width=" + S + " stride=" + horizontalStride + " pad=" + widthPadding);
 		}
-		return ret;
+		return (W + 2 * widthPadding - S) / horizontalStride + 1;
 	}