You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2017/02/04 18:24:52 UTC
incubator-systemml git commit: [SYSTEMML-527] Write function
description parameter
Repository: incubator-systemml
Updated Branches:
refs/heads/master 578e595fd -> e2492fb61
[SYSTEMML-527] Write function description parameter
Add description parameter to write function to allow writing description
to metadata. Additionally set author to be user name if available and add
a created timestamp.
Closes #364.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e2492fb6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e2492fb6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e2492fb6
Branch: refs/heads/master
Commit: e2492fb61665fe55a0ccee34116c10bf84f38fbf
Parents: 578e595
Author: Deron Eriksson <de...@us.ibm.com>
Authored: Sat Feb 4 10:19:11 2017 -0800
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Sat Feb 4 10:19:11 2017 -0800
----------------------------------------------------------------------
src/main/java/org/apache/sysml/lops/Data.java | 14 ++++++++-
.../org/apache/sysml/parser/DataExpression.java | 7 ++++-
.../apache/sysml/parser/OutputStatement.java | 3 +-
.../instructions/cp/VariableCPInstruction.java | 18 +++++++++---
.../instructions/spark/WriteSPInstruction.java | 15 +++++++++-
.../matrix/data/FileFormatProperties.java | 14 +++++++--
.../sysml/runtime/util/MapReduceTool.java | 31 ++++++++++++++++----
7 files changed, 86 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/lops/Data.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/Data.java b/src/main/java/org/apache/sysml/lops/Data.java
index 2cf125e..8e723c4 100644
--- a/src/main/java/org/apache/sysml/lops/Data.java
+++ b/src/main/java/org/apache/sysml/lops/Data.java
@@ -494,7 +494,19 @@ public class Data extends Lop
}
}
-
+
+ if (operation == OperationTypes.WRITE) {
+ sb.append(OPERAND_DELIMITOR);
+ Lop descriptionLop = getInputParams().get(DataExpression.DESCRIPTIONPARAM);
+ if (descriptionLop != null) {
+ boolean descLiteral = (descriptionLop instanceof Data && ((Data) descriptionLop).isLiteral());
+ sb.append(prepOperand(descriptionLop.getOutputParameters().getLabel(), DataType.SCALAR,
+ ValueType.STRING, descLiteral));
+ } else {
+ sb.append(prepOperand("", DataType.SCALAR, ValueType.STRING, true));
+ }
+ }
+
return sb.toString();
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/parser/DataExpression.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DataExpression.java b/src/main/java/org/apache/sysml/parser/DataExpression.java
index cd9a862..9370bdd 100644
--- a/src/main/java/org/apache/sysml/parser/DataExpression.java
+++ b/src/main/java/org/apache/sysml/parser/DataExpression.java
@@ -78,6 +78,7 @@ public class DataExpression extends DataIdentifier
public static final String DESCRIPTIONPARAM = "description";
public static final String AUTHORPARAM = "author";
public static final String SCHEMAPARAM = "schema";
+ public static final String CREATEDPARAM = "created";
// Parameter names relevant to reading/writing delimited/csv files
public static final String DELIM_DELIMITER = "sep";
@@ -100,6 +101,7 @@ public class DataExpression extends DataIdentifier
public static final String[] READ_VALID_MTD_PARAM_NAMES =
{ IO_FILENAME, READROWPARAM, READCOLPARAM, READNUMNONZEROPARAM, FORMAT_TYPE,
ROWBLOCKCOUNTPARAM, COLUMNBLOCKCOUNTPARAM, DATATYPEPARAM, VALUETYPEPARAM, SCHEMAPARAM, DESCRIPTIONPARAM,
+ AUTHORPARAM, CREATEDPARAM,
// Parameters related to delimited/csv files.
DELIM_FILL_VALUE, DELIM_DELIMITER, DELIM_FILL, DELIM_HAS_HEADER_ROW, DELIM_NA_STRINGS
};
@@ -1836,7 +1838,10 @@ public class DataExpression extends DataIdentifier
{
// if the read method does not specify parameter value, then add MTD metadata file value to parameter list
if (getVarParam(key.toString()) == null){
- if ( !key.toString().equalsIgnoreCase(DESCRIPTIONPARAM) ) {
+ if (( !key.toString().equalsIgnoreCase(DESCRIPTIONPARAM) ) &&
+ ( !key.toString().equalsIgnoreCase(AUTHORPARAM) ) &&
+ ( !key.toString().equalsIgnoreCase(CREATEDPARAM) ) )
+ {
StringIdentifier strId = new StringIdentifier(val.toString(),
this.getFilename(), this.getBeginLine(), this.getBeginColumn(),
this.getEndLine(), this.getEndColumn());
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/parser/OutputStatement.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/OutputStatement.java b/src/main/java/org/apache/sysml/parser/OutputStatement.java
index d42432c..00859be 100644
--- a/src/main/java/org/apache/sysml/parser/OutputStatement.java
+++ b/src/main/java/org/apache/sysml/parser/OutputStatement.java
@@ -35,7 +35,8 @@ public class OutputStatement extends Statement
DataExpression.FORMAT_TYPE,
DataExpression.DELIM_DELIMITER,
DataExpression.DELIM_HAS_HEADER_ROW,
- DataExpression.DELIM_SPARSE};
+ DataExpression.DELIM_SPARSE,
+ DataExpression.DESCRIPTIONPARAM};
public DataIdentifier getIdentifier(){
return _id;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
index d5ce3f7..78fe330 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
@@ -101,6 +101,7 @@ public class VariableCPInstruction extends CPInstruction
private CPOperand input1;
private CPOperand input2;
private CPOperand input3;
+ private CPOperand input4;
private CPOperand output;
private MetaData metadata;
private UpdateType _updateType;
@@ -274,15 +275,15 @@ public class VariableCPInstruction extends CPInstruction
else if ( voc == VariableOperationCode.Write ) {
// All write instructions have 3 parameters, except in case of delimited/csv file.
// Write instructions for csv files also include three additional parameters (hasHeader, delimiter, sparse)
- if ( parts.length != 4 && parts.length != 7 )
- throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
+ if ( parts.length != 5 && parts.length != 8 )
+ throw new DMLRuntimeException("Invalid number of operands in write instruction: " + str);
}
else {
_arity = getArity(voc);
InstructionUtils.checkNumFields ( parts, _arity ); // no output
}
- CPOperand in1=null, in2=null, in3=null, out=null;
+ CPOperand in1=null, in2=null, in3=null, in4=null, out=null;
switch (voc) {
@@ -413,6 +414,13 @@ public class VariableCPInstruction extends CPInstruction
boolean sparse = Boolean.parseBoolean(parts[6]);
FileFormatProperties formatProperties = new CSVFileFormatProperties(hasHeader, delim, sparse);
inst.setFormatProperties(formatProperties);
+ in4 = new CPOperand(parts[7]); // description
+ inst.input4 = in4;
+ } else {
+ FileFormatProperties ffp = new FileFormatProperties();
+ inst.setFormatProperties(ffp);
+ in4 = new CPOperand(parts[4]); // description
+ inst.input4 = in4;
}
return inst;
@@ -745,6 +753,8 @@ public class VariableCPInstruction extends CPInstruction
{
//get filename (literal or variable expression)
String fname = ec.getScalarInput(input2.getName(), ValueType.STRING, input2.isLiteral()).getStringValue();
+ String desc = ec.getScalarInput(input4.getName(), ValueType.STRING, input4.isLiteral()).getStringValue();
+ _formatProperties.setDescription(desc);
if( input1.getDataType() == DataType.SCALAR ) {
writeScalarToHDFS(ec, fname);
@@ -758,7 +768,7 @@ public class VariableCPInstruction extends CPInstruction
else {
// Default behavior
MatrixObject mo = ec.getMatrixObject(input1.getName());
- mo.exportData(fname, outFmt);
+ mo.exportData(fname, outFmt, _formatProperties);
}
}
else if( input1.getDataType() == DataType.FRAME ) {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
index 431ff24..912dbe3 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
@@ -55,6 +55,7 @@ public class WriteSPInstruction extends SPInstruction
private CPOperand input1 = null;
private CPOperand input2 = null;
private CPOperand input3 = null;
+ private CPOperand input4 = null;
private FileFormatProperties formatProperties;
//scalars might occur for transform
@@ -82,7 +83,7 @@ public class WriteSPInstruction extends SPInstruction
// All write instructions have 3 parameters, except in case of delimited/csv file.
// Write instructions for csv files also include three additional parameters (hasHeader, delimiter, sparse)
- if ( parts.length != 4 && parts.length != 8 ) {
+ if ( parts.length != 5 && parts.length != 9 ) {
throw new DMLRuntimeException("Invalid number of operands in write instruction: " + str);
}
@@ -103,6 +104,15 @@ public class WriteSPInstruction extends SPInstruction
boolean isInputMB = Boolean.parseBoolean(parts[7]);
inst.setInputMatrixBlock(isInputMB);
+
+ CPOperand in4 = new CPOperand(parts[8]);
+ inst.input4 = in4;
+ } else {
+ FileFormatProperties ffp = new FileFormatProperties();
+
+ CPOperand in4 = new CPOperand(parts[4]);
+ inst.input4 = in4;
+ inst.setFormatProperties(ffp);
}
return inst;
}
@@ -132,6 +142,9 @@ public class WriteSPInstruction extends SPInstruction
//get filename (literal or variable expression)
String fname = ec.getScalarInput(input2.getName(), ValueType.STRING, input2.isLiteral()).getStringValue();
+ String desc = ec.getScalarInput(input4.getName(), ValueType.STRING, input4.isLiteral()).getStringValue();
+ formatProperties.setDescription(desc);
+
ValueType[] schema = (input1.getDataType()==DataType.FRAME) ?
sec.getFrameObject(input1.getName()).getSchema() : null;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java
index b782d7b..2f405da 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java
@@ -22,16 +22,17 @@ package org.apache.sysml.runtime.matrix.data;
public class FileFormatProperties
{
+ private String description;
public enum FileFormat { CSV, NATIVE };
FileFormat fmt;
- FileFormatProperties() {
+ public FileFormatProperties() {
fmt = FileFormat.NATIVE;
}
- FileFormatProperties(FileFormat fmt) {
+ public FileFormatProperties(FileFormat fmt) {
this.fmt = fmt;
}
@@ -42,4 +43,13 @@ public class FileFormatProperties
public FileFormat getFileFormat() {
return fmt;
}
+
+ public String getDescription() {
+ return description;
+ }
+
+ public void setDescription(String description) {
+ this.description = description;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
index 6f083f7..ff121b6 100644
--- a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
+++ b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
@@ -25,7 +25,11 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -386,7 +390,7 @@ public class MapReduceTool
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
try {
- String mtd = metaDataToString(mtdfile, vt, schema, dt, mc, outinfo, formatProperties);
+ String mtd = metaDataToString(vt, schema, dt, mc, outinfo, formatProperties);
br.write(mtd);
br.close();
} catch (Exception e) {
@@ -402,8 +406,7 @@ public class MapReduceTool
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
try {
- String mtd = metaDataToString(mtdfile, vt, null,
- DataType.SCALAR, null, OutputInfo.TextCellOutputInfo, null);
+ String mtd = metaDataToString(vt, null, DataType.SCALAR, null, OutputInfo.TextCellOutputInfo, null);
br.write(mtd);
br.close();
}
@@ -412,7 +415,7 @@ public class MapReduceTool
}
}
- public static String metaDataToString(String mtdfile, ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc,
+ public static String metaDataToString(ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc,
OutputInfo outinfo, FileFormatProperties formatProperties) throws JSONException, DMLRuntimeException
{
OrderedJSONObject mtd = new OrderedJSONObject(); // maintain order in output file
@@ -456,8 +459,24 @@ public class MapReduceTool
mtd.put(DataExpression.DELIM_HAS_HEADER_ROW, csvProperties.hasHeader());
mtd.put(DataExpression.DELIM_DELIMITER, csvProperties.getDelim());
}
- mtd.put(DataExpression.DESCRIPTIONPARAM,
- new OrderedJSONObject().put(DataExpression.AUTHORPARAM, "SystemML"));
+
+ if (formatProperties != null) {
+ String description = formatProperties.getDescription();
+ if (StringUtils.isNotEmpty(description)) {
+ String jsonDescription = StringEscapeUtils.escapeJson(description);
+ mtd.put(DataExpression.DESCRIPTIONPARAM, jsonDescription);
+ }
+ }
+
+ String userName = System.getProperty("user.name");
+ if (StringUtils.isNotEmpty(userName)) {
+ mtd.put(DataExpression.AUTHORPARAM, userName);
+ } else {
+ mtd.put(DataExpression.AUTHORPARAM, "SystemML");
+ }
+
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z");
+ mtd.put(DataExpression.CREATEDPARAM, sdf.format(new Date()));
return mtd.toString(4); // indent with 4 spaces
}