You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2017/02/04 18:24:52 UTC

incubator-systemml git commit: [SYSTEMML-527] Write function description parameter

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 578e595fd -> e2492fb61


[SYSTEMML-527] Write function description parameter

Add description parameter to write function to allow writing description
to metadata. Additionally set author to be user name if available and add
a created timestamp.

Closes #364.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e2492fb6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e2492fb6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e2492fb6

Branch: refs/heads/master
Commit: e2492fb61665fe55a0ccee34116c10bf84f38fbf
Parents: 578e595
Author: Deron Eriksson <de...@us.ibm.com>
Authored: Sat Feb 4 10:19:11 2017 -0800
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Sat Feb 4 10:19:11 2017 -0800

----------------------------------------------------------------------
 src/main/java/org/apache/sysml/lops/Data.java   | 14 ++++++++-
 .../org/apache/sysml/parser/DataExpression.java |  7 ++++-
 .../apache/sysml/parser/OutputStatement.java    |  3 +-
 .../instructions/cp/VariableCPInstruction.java  | 18 +++++++++---
 .../instructions/spark/WriteSPInstruction.java  | 15 +++++++++-
 .../matrix/data/FileFormatProperties.java       | 14 +++++++--
 .../sysml/runtime/util/MapReduceTool.java       | 31 ++++++++++++++++----
 7 files changed, 86 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/lops/Data.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/Data.java b/src/main/java/org/apache/sysml/lops/Data.java
index 2cf125e..8e723c4 100644
--- a/src/main/java/org/apache/sysml/lops/Data.java
+++ b/src/main/java/org/apache/sysml/lops/Data.java
@@ -494,7 +494,19 @@ public class Data extends Lop
 			}
 			
 		}
-		
+
+		if (operation == OperationTypes.WRITE) {
+			sb.append(OPERAND_DELIMITOR);
+			Lop descriptionLop = getInputParams().get(DataExpression.DESCRIPTIONPARAM);
+			if (descriptionLop != null) {
+				boolean descLiteral = (descriptionLop instanceof Data && ((Data) descriptionLop).isLiteral());
+				sb.append(prepOperand(descriptionLop.getOutputParameters().getLabel(), DataType.SCALAR,
+						ValueType.STRING, descLiteral));
+			} else {
+				sb.append(prepOperand("", DataType.SCALAR, ValueType.STRING, true));
+			}
+		}
+
 		return sb.toString();
 	}
 	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/parser/DataExpression.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DataExpression.java b/src/main/java/org/apache/sysml/parser/DataExpression.java
index cd9a862..9370bdd 100644
--- a/src/main/java/org/apache/sysml/parser/DataExpression.java
+++ b/src/main/java/org/apache/sysml/parser/DataExpression.java
@@ -78,6 +78,7 @@ public class DataExpression extends DataIdentifier
 	public static final String DESCRIPTIONPARAM = "description";
 	public static final String AUTHORPARAM = "author";
 	public static final String SCHEMAPARAM = "schema";
+	public static final String CREATEDPARAM = "created";
 
 	// Parameter names relevant to reading/writing delimited/csv files
 	public static final String DELIM_DELIMITER = "sep";
@@ -100,6 +101,7 @@ public class DataExpression extends DataIdentifier
 	public static final String[] READ_VALID_MTD_PARAM_NAMES = 
 		{ IO_FILENAME, READROWPARAM, READCOLPARAM, READNUMNONZEROPARAM, FORMAT_TYPE,
 			ROWBLOCKCOUNTPARAM, COLUMNBLOCKCOUNTPARAM, DATATYPEPARAM, VALUETYPEPARAM, SCHEMAPARAM, DESCRIPTIONPARAM,
+			AUTHORPARAM, CREATEDPARAM,
 			// Parameters related to delimited/csv files.
 			DELIM_FILL_VALUE, DELIM_DELIMITER, DELIM_FILL, DELIM_HAS_HEADER_ROW, DELIM_NA_STRINGS
 		}; 
@@ -1836,7 +1838,10 @@ public class DataExpression extends DataIdentifier
 			{
 				// if the read method does not specify parameter value, then add MTD metadata file value to parameter list
 				if (getVarParam(key.toString()) == null){
-					if ( !key.toString().equalsIgnoreCase(DESCRIPTIONPARAM) ) {
+					if (( !key.toString().equalsIgnoreCase(DESCRIPTIONPARAM) ) &&
+							( !key.toString().equalsIgnoreCase(AUTHORPARAM) ) &&
+							( !key.toString().equalsIgnoreCase(CREATEDPARAM) ) )
+					{
 						StringIdentifier strId = new StringIdentifier(val.toString(),
 								this.getFilename(), this.getBeginLine(), this.getBeginColumn(), 
 								this.getEndLine(), this.getEndColumn());

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/parser/OutputStatement.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/OutputStatement.java b/src/main/java/org/apache/sysml/parser/OutputStatement.java
index d42432c..00859be 100644
--- a/src/main/java/org/apache/sysml/parser/OutputStatement.java
+++ b/src/main/java/org/apache/sysml/parser/OutputStatement.java
@@ -35,7 +35,8 @@ public class OutputStatement extends Statement
 																DataExpression.FORMAT_TYPE, 
 																DataExpression.DELIM_DELIMITER, 
 																DataExpression.DELIM_HAS_HEADER_ROW, 
-																DataExpression.DELIM_SPARSE};
+																DataExpression.DELIM_SPARSE,
+																DataExpression.DESCRIPTIONPARAM};
 
 	public DataIdentifier getIdentifier(){
 		return _id;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
index d5ce3f7..78fe330 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
@@ -101,6 +101,7 @@ public class VariableCPInstruction extends CPInstruction
 	private CPOperand input1;
 	private CPOperand input2;
 	private CPOperand input3;
+	private CPOperand input4;
 	private CPOperand output;
 	private MetaData metadata;
 	private UpdateType _updateType;
@@ -274,15 +275,15 @@ public class VariableCPInstruction extends CPInstruction
 		else if ( voc == VariableOperationCode.Write ) {
 			// All write instructions have 3 parameters, except in case of delimited/csv file.
 			// Write instructions for csv files also include three additional parameters (hasHeader, delimiter, sparse)
-			if ( parts.length != 4 && parts.length != 7 )
-				throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
+			if ( parts.length != 5 && parts.length != 8 )
+				throw new DMLRuntimeException("Invalid number of operands in write instruction: " + str);
 		}
 		else {
 			_arity = getArity(voc);
 			InstructionUtils.checkNumFields ( parts, _arity ); // no output
 		}
 		
-		CPOperand in1=null, in2=null, in3=null, out=null;
+		CPOperand in1=null, in2=null, in3=null, in4=null, out=null;
 		
 		switch (voc) {
 		
@@ -413,6 +414,13 @@ public class VariableCPInstruction extends CPInstruction
 				boolean sparse = Boolean.parseBoolean(parts[6]);
 				FileFormatProperties formatProperties = new CSVFileFormatProperties(hasHeader, delim, sparse);
 				inst.setFormatProperties(formatProperties);
+				in4 = new CPOperand(parts[7]); // description
+				inst.input4 = in4;
+			} else {
+				FileFormatProperties ffp = new FileFormatProperties();
+				inst.setFormatProperties(ffp);
+				in4 = new CPOperand(parts[4]); // description
+				inst.input4 = in4;
 			}
 			return inst;
 			
@@ -745,6 +753,8 @@ public class VariableCPInstruction extends CPInstruction
 	{
 		//get filename (literal or variable expression)
 		String fname = ec.getScalarInput(input2.getName(), ValueType.STRING, input2.isLiteral()).getStringValue();
+		String desc = ec.getScalarInput(input4.getName(), ValueType.STRING, input4.isLiteral()).getStringValue();
+		_formatProperties.setDescription(desc);
 		
 		if( input1.getDataType() == DataType.SCALAR ) {
 			writeScalarToHDFS(ec, fname);
@@ -758,7 +768,7 @@ public class VariableCPInstruction extends CPInstruction
 			else {
 				// Default behavior
 				MatrixObject mo = ec.getMatrixObject(input1.getName());
-				mo.exportData(fname, outFmt);
+				mo.exportData(fname, outFmt, _formatProperties);
 			}
 		}
 		else if( input1.getDataType() == DataType.FRAME ) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
index 431ff24..912dbe3 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
@@ -55,6 +55,7 @@ public class WriteSPInstruction extends SPInstruction
 	private CPOperand input1 = null; 
 	private CPOperand input2 = null;
 	private CPOperand input3 = null;
+	private CPOperand input4 = null;
 	private FileFormatProperties formatProperties;
 	
 	//scalars might occur for transform
@@ -82,7 +83,7 @@ public class WriteSPInstruction extends SPInstruction
 		
 		// All write instructions have 3 parameters, except in case of delimited/csv file.
 		// Write instructions for csv files also include three additional parameters (hasHeader, delimiter, sparse)
-		if ( parts.length != 4 && parts.length != 8 ) {
+		if ( parts.length != 5 && parts.length != 9 ) {
 			throw new DMLRuntimeException("Invalid number of operands in write instruction: " + str);
 		}
 		
@@ -103,6 +104,15 @@ public class WriteSPInstruction extends SPInstruction
 			
 			boolean isInputMB = Boolean.parseBoolean(parts[7]);
 			inst.setInputMatrixBlock(isInputMB);
+
+			CPOperand in4 = new CPOperand(parts[8]);
+			inst.input4 = in4;
+		} else {
+			FileFormatProperties ffp = new FileFormatProperties();
+
+			CPOperand in4 = new CPOperand(parts[4]);
+			inst.input4 = in4;
+			inst.setFormatProperties(ffp);
 		}
 		return inst;		
 	}
@@ -132,6 +142,9 @@ public class WriteSPInstruction extends SPInstruction
 
 		//get filename (literal or variable expression)
 		String fname = ec.getScalarInput(input2.getName(), ValueType.STRING, input2.isLiteral()).getStringValue();
+		String desc = ec.getScalarInput(input4.getName(), ValueType.STRING, input4.isLiteral()).getStringValue();
+		formatProperties.setDescription(desc);
+
 		ValueType[] schema = (input1.getDataType()==DataType.FRAME) ? 
 				sec.getFrameObject(input1.getName()).getSchema() : null;
 		

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java
index b782d7b..2f405da 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java
@@ -22,16 +22,17 @@ package org.apache.sysml.runtime.matrix.data;
 public class FileFormatProperties 
 {
 	
+	private String description;
 	
 	public enum FileFormat { CSV, NATIVE };
 	
 	FileFormat fmt;
 	
-	FileFormatProperties() {
+	public FileFormatProperties() {
 		fmt = FileFormat.NATIVE;
 	}
 	
-	FileFormatProperties(FileFormat fmt) {
+	public FileFormatProperties(FileFormat fmt) {
 		this.fmt = fmt;
 	}
 	
@@ -42,4 +43,13 @@ public class FileFormatProperties
 	public FileFormat getFileFormat() {
 		return fmt;
 	}
+
+	public String getDescription() {
+		return description;
+	}
+
+	public void setDescription(String description) {
+		this.description = description;
+	}
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
index 6f083f7..ff121b6 100644
--- a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
+++ b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
@@ -25,7 +25,11 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
+import java.text.SimpleDateFormat;
+import java.util.Date;
 
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringEscapeUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -386,7 +390,7 @@ public class MapReduceTool
 		BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
 
 		try {
-			String mtd = metaDataToString(mtdfile, vt, schema, dt, mc, outinfo, formatProperties);
+			String mtd = metaDataToString(vt, schema, dt, mc, outinfo, formatProperties);
 			br.write(mtd);
 			br.close();
 		} catch (Exception e) {
@@ -402,8 +406,7 @@ public class MapReduceTool
 		BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
 
 		try {
-			String mtd = metaDataToString(mtdfile, vt, null, 
-				DataType.SCALAR, null, OutputInfo.TextCellOutputInfo, null);
+			String mtd = metaDataToString(vt, null, DataType.SCALAR, null, OutputInfo.TextCellOutputInfo, null);
 			br.write(mtd);
 			br.close();
 		} 
@@ -412,7 +415,7 @@ public class MapReduceTool
 		}
 	}
 
-	public static String metaDataToString(String mtdfile, ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc, 
+	public static String metaDataToString(ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc,
 			OutputInfo outinfo, FileFormatProperties formatProperties) throws JSONException, DMLRuntimeException
 	{
 		OrderedJSONObject mtd = new OrderedJSONObject(); // maintain order in output file
@@ -456,8 +459,24 @@ public class MapReduceTool
 			mtd.put(DataExpression.DELIM_HAS_HEADER_ROW, csvProperties.hasHeader());
 			mtd.put(DataExpression.DELIM_DELIMITER, csvProperties.getDelim());
 		}
-		mtd.put(DataExpression.DESCRIPTIONPARAM,
-			new OrderedJSONObject().put(DataExpression.AUTHORPARAM, "SystemML"));
+
+		if (formatProperties != null) {
+			String description = formatProperties.getDescription();
+			if (StringUtils.isNotEmpty(description)) {
+				String jsonDescription = StringEscapeUtils.escapeJson(description);
+				mtd.put(DataExpression.DESCRIPTIONPARAM, jsonDescription);
+			}
+		}
+
+		String userName = System.getProperty("user.name");
+		if (StringUtils.isNotEmpty(userName)) {
+			mtd.put(DataExpression.AUTHORPARAM, userName);
+		} else {
+			mtd.put(DataExpression.AUTHORPARAM, "SystemML");
+		}
+
+		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z");
+		mtd.put(DataExpression.CREATEDPARAM, sdf.format(new Date()));
 
 		return mtd.toString(4); // indent with 4 spaces	
 	}