You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2016/09/04 17:41:52 UTC
[2/2] incubator-systemml git commit: [SYSTEMML-896] Additional
MLContext Frame support
[SYSTEMML-896] Additional MLContext Frame support
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/d39865e9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/d39865e9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/d39865e9
Branch: refs/heads/master
Commit: d39865e9e3468cee0fab95cb9d8efe1ba4fe992f
Parents: b7657db
Author: Deron Eriksson <de...@us.ibm.com>
Authored: Sat Sep 3 23:10:40 2016 -0700
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Sat Sep 3 23:10:40 2016 -0700
----------------------------------------------------------------------
.../sysml/api/mlcontext/BinaryBlockFrame.java | 179 +++++
.../sysml/api/mlcontext/BinaryBlockMatrix.java | 4 +-
.../org/apache/sysml/api/mlcontext/Frame.java | 138 ++++
.../apache/sysml/api/mlcontext/FrameFormat.java | 42 ++
.../sysml/api/mlcontext/FrameMetadata.java | 695 +++++++++++++++++++
.../apache/sysml/api/mlcontext/FrameSchema.java | 128 ++++
.../api/mlcontext/MLContextConversionUtil.java | 250 ++++---
.../sysml/api/mlcontext/MLContextUtil.java | 306 +++++---
.../apache/sysml/api/mlcontext/MLResults.java | 247 ++++---
.../sysml/api/mlcontext/MatrixMetadata.java | 2 +-
.../apache/sysml/api/mlcontext/Metadata.java | 30 +
.../org/apache/sysml/api/mlcontext/Script.java | 100 +--
.../sysml/api/mlcontext/ScriptExecutor.java | 10 +-
.../mlcontext/MLContextFrameTest.java | 557 +++++++++++++++
.../integration/mlcontext/MLContextTest.java | 359 +---------
.../integration/mlcontext/ZPackageSuite.java | 3 +-
16 files changed, 2319 insertions(+), 731 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
new file mode 100644
index 0000000..88b1b38
--- /dev/null
+++ b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.api.mlcontext;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.sql.DataFrame;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.data.FrameBlock;
+
+/**
+ * BinaryBlockFrame stores data as a SystemML binary-block frame representation.
+ *
+ */
+public class BinaryBlockFrame {
+
+ JavaPairRDD<Long, FrameBlock> binaryBlocks;
+ FrameMetadata frameMetadata;
+
+ /**
+ * Convert a Spark DataFrame to a SystemML binary-block representation.
+ *
+ * @param dataFrame
+ * the Spark DataFrame
+ * @param frameMetadata
+ * frame metadata, such as number of rows and columns
+ */
+ public BinaryBlockFrame(DataFrame dataFrame, FrameMetadata frameMetadata) {
+ this.frameMetadata = frameMetadata;
+ binaryBlocks = MLContextConversionUtil.dataFrameToFrameBinaryBlocks(dataFrame, frameMetadata);
+ }
+
+ /**
+ * Convert a Spark DataFrame to a SystemML binary-block representation,
+ * specifying the number of rows and columns.
+ *
+ * @param dataFrame
+ * the Spark DataFrame
+ * @param numRows
+ * the number of rows
+ * @param numCols
+ * the number of columns
+ */
+ public BinaryBlockFrame(DataFrame dataFrame, long numRows, long numCols) {
+ this(dataFrame, new FrameMetadata(numRows, numCols, MLContextUtil.defaultBlockSize(),
+ MLContextUtil.defaultBlockSize()));
+ }
+
+ /**
+ * Convert a Spark DataFrame to a SystemML binary-block representation.
+ *
+ * @param dataFrame
+ * the Spark DataFrame
+ */
+ public BinaryBlockFrame(DataFrame dataFrame) {
+ this(dataFrame, new FrameMetadata());
+ }
+
+ /**
+ * Create a BinaryBlockFrame, specifying the SystemML binary-block frame and
+ * its metadata.
+ *
+ * @param binaryBlocks
+ * the {@code JavaPairRDD<Long, FrameBlock>} frame
+ * @param matrixCharacteristics
+ * the frame metadata as {@code MatrixCharacteristics}
+ */
+ public BinaryBlockFrame(JavaPairRDD<Long, FrameBlock> binaryBlocks, MatrixCharacteristics matrixCharacteristics) {
+ this.binaryBlocks = binaryBlocks;
+ this.frameMetadata = new FrameMetadata(matrixCharacteristics);
+ }
+
+ /**
+ * Create a BinaryBlockFrame, specifying the SystemML binary-block frame and
+ * its metadata.
+ *
+ * @param binaryBlocks
+ * the {@code JavaPairRDD<Long, FrameBlock>} frame
+ * @param frameMetadata
+ * the frame metadata as {@code FrameMetadata}
+ */
+ public BinaryBlockFrame(JavaPairRDD<Long, FrameBlock> binaryBlocks, FrameMetadata frameMetadata) {
+ this.binaryBlocks = binaryBlocks;
+ this.frameMetadata = frameMetadata;
+ }
+
+ /**
+ * Obtain a SystemML binary-block frame as a
+ * {@code JavaPairRDD<Long, FrameBlock>}
+ *
+ * @return the SystemML binary-block frame
+ */
+ public JavaPairRDD<Long, FrameBlock> getBinaryBlocks() {
+ return binaryBlocks;
+ }
+
+ /**
+ * Obtain a SystemML binary-block frame as a {@code FrameBlock}
+ *
+ * @return the SystemML binary-block frame as a {@code FrameBlock}
+ */
+ public FrameBlock getFrameBlock() {
+ try {
+ MatrixCharacteristics mc = getMatrixCharacteristics();
+ FrameSchema frameSchema = frameMetadata.getFrameSchema();
+ FrameBlock mb = SparkExecutionContext.toFrameBlock(binaryBlocks, frameSchema.getSchema(),
+ (int) mc.getRows(), (int) mc.getCols());
+ return mb;
+ } catch (DMLRuntimeException e) {
+ throw new MLContextException("Exception while getting FrameBlock from binary-block frame", e);
+ }
+ }
+
+ /**
+ * Obtain the SystemML binary-block frame characteristics
+ *
+ * @return the frame metadata as {@code MatrixCharacteristics}
+ */
+ public MatrixCharacteristics getMatrixCharacteristics() {
+ return frameMetadata.asMatrixCharacteristics();
+ }
+
+ /**
+ * Obtain the SystemML binary-block frame metadata
+ *
+ * @return the frame metadata as {@code FrameMetadata}
+ */
+ public FrameMetadata getFrameMetadata() {
+ return frameMetadata;
+ }
+
+ /**
+ * Set the SystemML binary-block frame metadata
+ *
+ * @param frameMetadata
+ * the frame metadata
+ */
+ public void setFrameMetadata(FrameMetadata frameMetadata) {
+ this.frameMetadata = frameMetadata;
+ }
+
+ /**
+ * Set the SystemML binary-block frame as a
+ * {@code JavaPairRDD<Long, FrameBlock>}
+ *
+ * @param binaryBlocks
+ * the SystemML binary-block frame
+ */
+ public void setBinaryBlocks(JavaPairRDD<Long, FrameBlock> binaryBlocks) {
+ this.binaryBlocks = binaryBlocks;
+ }
+
+ @Override
+ public String toString() {
+ if (frameMetadata != null) {
+ return frameMetadata.toString();
+ } else {
+ return super.toString();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
index b13669d..ffa8a11 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
@@ -28,7 +28,7 @@ import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
/**
- * BinaryBlockMatrix stores data as a SystemML binary-block representation.
+ * BinaryBlockMatrix stores data as a SystemML binary-block matrix representation.
*
*/
public class BinaryBlockMatrix {
@@ -46,7 +46,7 @@ public class BinaryBlockMatrix {
*/
public BinaryBlockMatrix(DataFrame dataFrame, MatrixMetadata matrixMetadata) {
this.matrixMetadata = matrixMetadata;
- binaryBlocks = MLContextConversionUtil.dataFrameToBinaryBlocks(dataFrame, matrixMetadata);
+ binaryBlocks = MLContextConversionUtil.dataFrameToMatrixBinaryBlocks(dataFrame, matrixMetadata);
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/Frame.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/Frame.java b/src/main/java/org/apache/sysml/api/mlcontext/Frame.java
new file mode 100644
index 0000000..ee447df
--- /dev/null
+++ b/src/main/java/org/apache/sysml/api/mlcontext/Frame.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.api.mlcontext;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.rdd.RDD;
+import org.apache.spark.sql.DataFrame;
+import org.apache.sysml.runtime.controlprogram.caching.FrameObject;
+import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+
+/**
+ * Frame encapsulates a SystemML frame.
+ *
+ */
+public class Frame {
+
+ private FrameObject frameObject;
+ private SparkExecutionContext sparkExecutionContext;
+
+ public Frame(FrameObject frameObject, SparkExecutionContext sparkExecutionContext) {
+ this.frameObject = frameObject;
+ this.sparkExecutionContext = sparkExecutionContext;
+ }
+
+ /**
+ * Obtain the frame as a SystemML FrameObject.
+ *
+ * @return the frame as a SystemML FrameObject
+ */
+ public FrameObject asFrameObject() {
+ return frameObject;
+ }
+
+ /**
+ * Obtain the frame as a two-dimensional String array
+ *
+ * @return the frame as a two-dimensional String array
+ */
+ public String[][] as2DStringArray() {
+ String[][] strArray = MLContextConversionUtil.frameObjectTo2DStringArray(frameObject);
+ return strArray;
+ }
+
+ /**
+ * Obtain the frame as a {@code JavaRDD<String>} in IJV format
+ *
+ * @return the frame as a {@code JavaRDD<String>} in IJV format
+ */
+ public JavaRDD<String> asJavaRDDStringIJV() {
+ JavaRDD<String> javaRDDStringIJV = MLContextConversionUtil.frameObjectToJavaRDDStringIJV(frameObject);
+ return javaRDDStringIJV;
+ }
+
+ /**
+ * Obtain the frame as a {@code JavaRDD<String>} in CSV format
+ *
+ * @return the frame as a {@code JavaRDD<String>} in CSV format
+ */
+ public JavaRDD<String> asJavaRDDStringCSV() {
+ JavaRDD<String> javaRDDStringCSV = MLContextConversionUtil.frameObjectToJavaRDDStringCSV(frameObject, ",");
+ return javaRDDStringCSV;
+ }
+
+ /**
+ * Obtain the frame as a {@code RDD<String>} in CSV format
+ *
+ * @return the frame as a {@code RDD<String>} in CSV format
+ */
+ public RDD<String> asRDDStringCSV() {
+ RDD<String> rddStringCSV = MLContextConversionUtil.frameObjectToRDDStringCSV(frameObject, ",");
+ return rddStringCSV;
+ }
+
+ /**
+ * Obtain the frame as a {@code RDD<String>} in IJV format
+ *
+ * @return the frame as a {@code RDD<String>} in IJV format
+ */
+ public RDD<String> asRDDStringIJV() {
+ RDD<String> rddStringIJV = MLContextConversionUtil.frameObjectToRDDStringIJV(frameObject);
+ return rddStringIJV;
+ }
+
+ /**
+ * Obtain the frame as a {@code DataFrame}
+ *
+ * @return the frame as a {@code DataFrame}
+ */
+ public DataFrame asDataFrame() {
+ DataFrame df = MLContextConversionUtil.frameObjectToDataFrame(frameObject, sparkExecutionContext);
+ return df;
+ }
+
+ /**
+ * Obtain the matrix as a {@code BinaryBlockFrame}
+ *
+ * @return the matrix as a {@code BinaryBlockFrame}
+ */
+ public BinaryBlockFrame asBinaryBlockFrame() {
+ BinaryBlockFrame binaryBlockFrame = MLContextConversionUtil.frameObjectToBinaryBlockFrame(frameObject,
+ sparkExecutionContext);
+ return binaryBlockFrame;
+ }
+
+ /**
+ * Obtain the frame metadata
+ *
+ * @return the frame metadata
+ */
+ public FrameMetadata getFrameMetadata() {
+ MatrixCharacteristics matrixCharacteristics = frameObject.getMatrixCharacteristics();
+ FrameMetadata frameMetadata = new FrameMetadata(matrixCharacteristics);
+ return frameMetadata;
+ }
+
+ @Override
+ public String toString() {
+ return frameObject.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/FrameFormat.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/FrameFormat.java b/src/main/java/org/apache/sysml/api/mlcontext/FrameFormat.java
new file mode 100644
index 0000000..bce8e5d
--- /dev/null
+++ b/src/main/java/org/apache/sysml/api/mlcontext/FrameFormat.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.api.mlcontext;
+
+/**
+ * FrameFormat represents the different frame formats supported by the MLContext
+ * API.
+ *
+ */
+public enum FrameFormat {
+ /**
+ * Comma-separated value format (dense).
+ */
+ CSV,
+
+ /**
+ * (I J V) format (sparse). I and J represent frame coordinates and V
+ * represents the value. The I J and V values are space-separated.
+ */
+ IJV;
+
+ public boolean hasIDColumn() {
+ return false;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java b/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java
new file mode 100644
index 0000000..5aabd80
--- /dev/null
+++ b/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java
@@ -0,0 +1,695 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.api.mlcontext;
+
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+
+/**
+ * Frame metadata, such as the number of rows, the number of columns, the number
+ * of non-zero values, the number of rows per block, and the number of columns
+ * per block in the frame.
+ *
+ */
+public class FrameMetadata extends Metadata {
+
+ private Long numRows = null;
+ private Long numColumns = null;
+ private Long numNonZeros = null;
+ private Integer numRowsPerBlock = null;
+ private Integer numColumnsPerBlock = null;
+ private FrameFormat frameFormat;
+ private FrameSchema frameSchema;
+
+ public FrameMetadata() {
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on a string
+ * representation of a frame schema.
+ *
+ * @param schema
+ * String representation of the frame schema.
+ */
+ public FrameMetadata(String schema) {
+ this.frameSchema = new FrameSchema(schema);
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame format.
+ *
+ * @param frameFormat
+ * The frame format.
+ */
+ public FrameMetadata(FrameFormat frameFormat) {
+ this.frameFormat = frameFormat;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame schema.
+ *
+ * @param frameSchema
+ * The frame schema.
+ */
+ public FrameMetadata(FrameSchema frameSchema) {
+ this.frameSchema = frameSchema;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame format and
+ * frame schema.
+ *
+ * @param frameFormat
+ * The frame format.
+ * @param frameSchema
+ * The frame schema.
+ */
+ public FrameMetadata(FrameFormat frameFormat, FrameSchema frameSchema) {
+ this.frameFormat = frameFormat;
+ this.frameSchema = frameSchema;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame format, frame
+ * schema, the number of rows, and the number of columns in a frame.
+ *
+ * @param frameFormat
+ * The frame format.
+ * @param frameSchema
+ * The frame schema.
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ */
+ public FrameMetadata(FrameFormat frameFormat, FrameSchema frameSchema, Long numRows, Long numColumns) {
+ this.frameFormat = frameFormat;
+ this.frameSchema = frameSchema;
+ this.numRows = numRows;
+ this.numColumns = numColumns;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame format, frame
+ * schema, the number of rows, and the number of columns in a frame.
+ *
+ * @param frameFormat
+ * The frame format.
+ * @param frameSchema
+ * The frame schema.
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ */
+ public FrameMetadata(FrameFormat frameFormat, FrameSchema frameSchema, int numRows, int numColumns) {
+ this.frameFormat = frameFormat;
+ this.frameSchema = frameSchema;
+ this.numRows = (long) numRows;
+ this.numColumns = (long) numColumns;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame format, frame
+ * schema, the number of rows, the number of columns, the number of non-zero
+ * values, the number of rows per block, and the number of columns per block
+ * in a frame.
+ *
+ * @param frameFormat
+ * The frame format.
+ * @param frameSchema
+ * The frame schema.
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ * @param numNonZeros
+ * The number of non-zero values in the frame.
+ * @param numRowsPerBlock
+ * The number of rows per block in the frame.
+ * @param numColumnsPerBlock
+ * The number of columns per block in the frame.
+ */
+ public FrameMetadata(FrameFormat frameFormat, FrameSchema frameSchema, Long numRows, Long numColumns,
+ Long numNonZeros, Integer numRowsPerBlock, Integer numColumnsPerBlock) {
+ this.frameFormat = frameFormat;
+ this.frameSchema = frameSchema;
+ this.numRows = numRows;
+ this.numColumns = numColumns;
+ this.numNonZeros = numNonZeros;
+ this.numRowsPerBlock = numRowsPerBlock;
+ this.numColumnsPerBlock = numColumnsPerBlock;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame format, frame
+ * schema, the number of rows, the number of columns, the number of non-zero
+ * values, the number of rows per block, and the number of columns per block
+ * in a frame.
+ *
+ * @param frameFormat
+ * The frame format.
+ * @param frameSchema
+ * The frame schema.
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ * @param numNonZeros
+ * The number of non-zero values in the frame.
+ * @param numRowsPerBlock
+ * The number of rows per block in the frame.
+ * @param numColumnsPerBlock
+ * The number of columns per block in the frame.
+ */
+ public FrameMetadata(FrameFormat frameFormat, FrameSchema frameSchema, int numRows, int numColumns, int numNonZeros,
+ int numRowsPerBlock, int numColumnsPerBlock) {
+ this.frameFormat = frameFormat;
+ this.frameSchema = frameSchema;
+ this.numRows = (long) numRows;
+ this.numColumns = (long) numColumns;
+ this.numNonZeros = (long) numNonZeros;
+ this.numRowsPerBlock = numRowsPerBlock;
+ this.numColumnsPerBlock = numColumnsPerBlock;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame format, the
+ * number of rows, and the number of columns in a frame.
+ *
+ * @param frameFormat
+ * The frame format.
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ */
+ public FrameMetadata(FrameFormat frameFormat, Long numRows, Long numColumns) {
+ this.frameFormat = frameFormat;
+ this.numRows = numRows;
+ this.numColumns = numColumns;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame format, the
+ * number of rows, and the number of columns in a frame.
+ *
+ * @param frameFormat
+ * The frame format.
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ */
+ public FrameMetadata(FrameFormat frameFormat, int numRows, int numColumns) {
+ this.frameFormat = frameFormat;
+ this.numRows = (long) numRows;
+ this.numColumns = (long) numColumns;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame format, the
+ * number of rows, the number of columns, and the number of non-zero values
+ * in a frame.
+ *
+ * @param frameFormat
+ * The frame format.
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ * @param numNonZeros
+ * The number of non-zero values in the frame.
+ */
+ public FrameMetadata(FrameFormat frameFormat, Long numRows, Long numColumns, Long numNonZeros) {
+ this.frameFormat = frameFormat;
+ this.numRows = numRows;
+ this.numColumns = numColumns;
+ this.numNonZeros = numNonZeros;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame format, the
+ * number of rows, the number of columns, and the number of non-zero values
+ * in a frame.
+ *
+ * @param frameFormat
+ * The frame format.
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ * @param numNonZeros
+ * The number of non-zero values in the frame.
+ */
+ public FrameMetadata(FrameFormat frameFormat, int numRows, int numColumns, int numNonZeros) {
+ this.frameFormat = frameFormat;
+ this.numRows = (long) numRows;
+ this.numColumns = (long) numColumns;
+ this.numNonZeros = (long) numNonZeros;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame format, the
+ * number of rows, the number of columns, the number of non-zero values, the
+ * number of rows per block, and the number of columns per block in a frame.
+ *
+ * @param frameFormat
+ * The frame format.
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ * @param numNonZeros
+ * The number of non-zero values in the frame.
+ * @param numRowsPerBlock
+ * The number of rows per block in the frame.
+ * @param numColumnsPerBlock
+ * The number of columns per block in the frame.
+ */
+ public FrameMetadata(FrameFormat frameFormat, Long numRows, Long numColumns, Long numNonZeros,
+ Integer numRowsPerBlock, Integer numColumnsPerBlock) {
+ this.frameFormat = frameFormat;
+ this.numRows = numRows;
+ this.numColumns = numColumns;
+ this.numNonZeros = numNonZeros;
+ this.numRowsPerBlock = numRowsPerBlock;
+ this.numColumnsPerBlock = numColumnsPerBlock;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on frame format, the
+ * number of rows, the number of columns, the number of non-zero values, the
+ * number of rows per block, and the number of columns per block in a frame.
+ *
+ * @param frameFormat
+ * The frame format.
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ * @param numNonZeros
+ * The number of non-zero values in the frame.
+ * @param numRowsPerBlock
+ * The number of rows per block in the frame.
+ * @param numColumnsPerBlock
+ * The number of columns per block in the frame.
+ */
+ public FrameMetadata(FrameFormat frameFormat, int numRows, int numColumns, int numNonZeros, int numRowsPerBlock,
+ int numColumnsPerBlock) {
+ this.frameFormat = frameFormat;
+ this.numRows = (long) numRows;
+ this.numColumns = (long) numColumns;
+ this.numNonZeros = (long) numNonZeros;
+ this.numRowsPerBlock = numRowsPerBlock;
+ this.numColumnsPerBlock = numColumnsPerBlock;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on the number of rows
+ * and the number of columns in a frame.
+ *
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ */
+ public FrameMetadata(Long numRows, Long numColumns) {
+ this.numRows = numRows;
+ this.numColumns = numColumns;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on the number of rows
+ * and the number of columns in a frame.
+ *
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ */
+ public FrameMetadata(int numRows, int numColumns) {
+ this.numRows = (long) numRows;
+ this.numColumns = (long) numColumns;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on the number of rows,
+ * the number of columns, and the number of non-zero values in a frame.
+ *
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ * @param numNonZeros
+ * The number of non-zero values in the frame.
+ */
+ public FrameMetadata(Long numRows, Long numColumns, Long numNonZeros) {
+ this.numRows = numRows;
+ this.numColumns = numColumns;
+ this.numNonZeros = numNonZeros;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on the number of rows,
+ * the number of columns, and the number of non-zero values in a frame.
+ *
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ * @param numNonZeros
+ * The number of non-zero values in the frame.
+ */
+ public FrameMetadata(int numRows, int numColumns, int numNonZeros) {
+ this.numRows = (long) numRows;
+ this.numColumns = (long) numColumns;
+ this.numNonZeros = (long) numNonZeros;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on the number of rows,
+ * the number of columns, the number of rows per block, and the number of
+ * columns per block in a frame.
+ *
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ * @param numRowsPerBlock
+ * The number of rows per block in the frame.
+ * @param numColumnsPerBlock
+ * The number of columns per block in the frame.
+ */
+ public FrameMetadata(Long numRows, Long numColumns, Integer numRowsPerBlock, Integer numColumnsPerBlock) {
+ this.numRows = numRows;
+ this.numColumns = numColumns;
+ this.numRowsPerBlock = numRowsPerBlock;
+ this.numColumnsPerBlock = numColumnsPerBlock;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on the number of rows,
+ * the number of columns, the number of rows per block, and the number of
+ * columns per block in a frame.
+ *
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ * @param numRowsPerBlock
+ * The number of rows per block in the frame.
+ * @param numColumnsPerBlock
+ * The number of columns per block in the frame.
+ */
+ public FrameMetadata(int numRows, int numColumns, int numRowsPerBlock, int numColumnsPerBlock) {
+ this.numRows = (long) numRows;
+ this.numColumns = (long) numColumns;
+ this.numRowsPerBlock = numRowsPerBlock;
+ this.numColumnsPerBlock = numColumnsPerBlock;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on the number of rows,
+ * the number of columns, the number of non-zero values, the number of rows
+ * per block, and the number of columns per block in a frame.
+ *
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ * @param numNonZeros
+ * The number of non-zero values in the frame.
+ * @param numRowsPerBlock
+ * The number of rows per block in the frame.
+ * @param numColumnsPerBlock
+ * The number of columns per block in the frame.
+ */
+ public FrameMetadata(Long numRows, Long numColumns, Long numNonZeros, Integer numRowsPerBlock,
+ Integer numColumnsPerBlock) {
+ this.numRows = numRows;
+ this.numColumns = numColumns;
+ this.numNonZeros = numNonZeros;
+ this.numRowsPerBlock = numRowsPerBlock;
+ this.numColumnsPerBlock = numColumnsPerBlock;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on the number of rows,
+ * the number of columns, the number of non-zero values, the number of rows
+ * per block, and the number of columns per block in a frame.
+ *
+ * @param numRows
+ * The number of rows in the frame.
+ * @param numColumns
+ * The number of columns in the frame.
+ * @param numNonZeros
+ * The number of non-zero values in the frame.
+ * @param numRowsPerBlock
+ * The number of rows per block in the frame.
+ * @param numColumnsPerBlock
+ * The number of columns per block in the frame.
+ */
+ public FrameMetadata(int numRows, int numColumns, int numNonZeros, int numRowsPerBlock, int numColumnsPerBlock) {
+ this.numRows = (long) numRows;
+ this.numColumns = (long) numColumns;
+ this.numNonZeros = (long) numNonZeros;
+ this.numRowsPerBlock = numRowsPerBlock;
+ this.numColumnsPerBlock = numColumnsPerBlock;
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on a
+ * MatrixCharacteristics object.
+ *
+ * @param matrixCharacteristics
+ * the frame metadata as a MatrixCharacteristics object
+ */
+ public FrameMetadata(MatrixCharacteristics matrixCharacteristics) {
+ this.numRows = matrixCharacteristics.getRows();
+ this.numColumns = matrixCharacteristics.getCols();
+ this.numNonZeros = matrixCharacteristics.getNonZeros();
+ this.numRowsPerBlock = matrixCharacteristics.getRowsPerBlock();
+ this.numColumnsPerBlock = matrixCharacteristics.getColsPerBlock();
+ }
+
+ /**
+ * Constructor to create a FrameMetadata object based on the frame schema
+ * and a MatrixCharacteristics object.
+ *
+ * @param frameSchema
+ * The frame schema.
+ * @param matrixCharacteristics
+ * the frame metadata as a MatrixCharacteristics object
+ */
+ public FrameMetadata(FrameSchema frameSchema, MatrixCharacteristics matrixCharacteristics) {
+ this.frameSchema = frameSchema;
+ this.numRows = matrixCharacteristics.getRows();
+ this.numColumns = matrixCharacteristics.getCols();
+ this.numNonZeros = matrixCharacteristics.getNonZeros();
+ this.numRowsPerBlock = matrixCharacteristics.getRowsPerBlock();
+ this.numColumnsPerBlock = matrixCharacteristics.getColsPerBlock();
+ }
+
+ /**
+ * Set the FrameMetadata fields based on a MatrixCharacteristics object.
+ *
+ * @param matrixCharacteristics
+ * the frame metadata as a MatrixCharacteristics object
+ */
+ public void setMatrixCharacteristics(MatrixCharacteristics matrixCharacteristics) {
+ this.numRows = matrixCharacteristics.getRows();
+ this.numColumns = matrixCharacteristics.getCols();
+ this.numNonZeros = matrixCharacteristics.getNonZeros();
+ this.numRowsPerBlock = matrixCharacteristics.getRowsPerBlock();
+ this.numColumnsPerBlock = matrixCharacteristics.getColsPerBlock();
+ }
+
+ /**
+ * Obtain the number of rows
+ *
+ * @return the number of rows
+ */
+ public Long getNumRows() {
+ return numRows;
+ }
+
+ /**
+ * Set the number of rows
+ *
+ * @param numRows
+ * the number of rows
+ */
+ public void setNumRows(Long numRows) {
+ this.numRows = numRows;
+ }
+
+ /**
+ * Obtain the number of columns
+ *
+ * @return the number of columns
+ */
+ public Long getNumColumns() {
+ return numColumns;
+ }
+
+ /**
+ * Set the number of columns
+ *
+ * @param numColumns
+ * the number of columns
+ */
+ public void setNumColumns(Long numColumns) {
+ this.numColumns = numColumns;
+ }
+
+ /**
+ * Obtain the number of non-zero values
+ *
+ * @return the number of non-zero values
+ */
+ public Long getNumNonZeros() {
+ return numNonZeros;
+ }
+
+ /**
+ * Set the number of non-zero values
+ *
+ * @param numNonZeros
+ * the number of non-zero values
+ */
+ public void setNumNonZeros(Long numNonZeros) {
+ this.numNonZeros = numNonZeros;
+ }
+
+ /**
+ * Obtain the number of rows per block
+ *
+ * @return the number of rows per block
+ */
+ public Integer getNumRowsPerBlock() {
+ return numRowsPerBlock;
+ }
+
+ /**
+ * Set the number of rows per block
+ *
+ * @param numRowsPerBlock
+ * the number of rows per block
+ */
+ public void setNumRowsPerBlock(Integer numRowsPerBlock) {
+ this.numRowsPerBlock = numRowsPerBlock;
+ }
+
+ /**
+ * Obtain the number of columns per block
+ *
+ * @return the number of columns per block
+ */
+ public Integer getNumColumnsPerBlock() {
+ return numColumnsPerBlock;
+ }
+
+ /**
+ * Set the number of columns per block
+ *
+ * @param numColumnsPerBlock
+ * the number of columns per block
+ */
+ public void setNumColumnsPerBlock(Integer numColumnsPerBlock) {
+ this.numColumnsPerBlock = numColumnsPerBlock;
+ }
+
+ /**
+ * Convert the frame metadata to a MatrixCharacteristics object. If all
+ * field values are {@code null}, {@code null} is returned.
+ *
+ * @return the frame metadata as a MatrixCharacteristics object, or
+ * {@code null} if all field values are null
+ */
+ public MatrixCharacteristics asMatrixCharacteristics() {
+
+ if ((numRows == null) && (numColumns == null) && (numRowsPerBlock == null) && (numColumnsPerBlock == null)
+ && (numNonZeros == null)) {
+ return null;
+ }
+
+ long nr = (numRows == null) ? -1 : numRows;
+ long nc = (numColumns == null) ? -1 : numColumns;
+ int nrpb = (numRowsPerBlock == null) ? MLContextUtil.defaultBlockSize() : numRowsPerBlock;
+ int ncpb = (numColumnsPerBlock == null) ? MLContextUtil.defaultBlockSize() : numColumnsPerBlock;
+ long nnz = (numNonZeros == null) ? -1 : numNonZeros;
+ MatrixCharacteristics mc = new MatrixCharacteristics(nr, nc, nrpb, ncpb, nnz);
+ return mc;
+ }
+
+ @Override
+ public String toString() {
+ return "rows: " + fieldDisplay(numRows) + ", columns: " + fieldDisplay(numColumns) + ", non-zeros: "
+ + fieldDisplay(numNonZeros) + ", rows per block: " + fieldDisplay(numRowsPerBlock)
+ + ", columns per block: " + fieldDisplay(numColumnsPerBlock);
+ }
+
+ private String fieldDisplay(Object field) {
+ if (field == null) {
+ return "None";
+ } else {
+ return field.toString();
+ }
+ }
+
+ /**
+ * Obtain the frame format
+ *
+ * @return the frame format
+ */
+ public FrameFormat getFrameFormat() {
+ return frameFormat;
+ }
+
+ /**
+ * Set the frame format
+ *
+ * @param frameFormat
+ * the frame format
+ */
+ public void setFrameFormat(FrameFormat frameFormat) {
+ this.frameFormat = frameFormat;
+ }
+
+ /**
+ * Obtain the frame schema
+ *
+ * @return the frame schema
+ */
+ public FrameSchema getFrameSchema() {
+ return frameSchema;
+ }
+
+ /**
+ * Set the frame schema
+ *
+ * @param frameSchema
+ * the frame schema
+ */
+ public void setFrameSchema(FrameSchema frameSchema) {
+ this.frameSchema = frameSchema;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/FrameSchema.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/FrameSchema.java b/src/main/java/org/apache/sysml/api/mlcontext/FrameSchema.java
new file mode 100644
index 0000000..040d77b
--- /dev/null
+++ b/src/main/java/org/apache/sysml/api/mlcontext/FrameSchema.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.api.mlcontext;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.sysml.parser.Expression.ValueType;
+
+/**
+ * The frame schema, stored as a list of {@code ValueType} values.
+ *
+ */
+public class FrameSchema {
+
+ private List<ValueType> schema = null;
+
+ public FrameSchema() {
+ }
+
+ /**
+ * Constructor that specifies the schema as a list of {@code ValueType}
+ * values.
+ *
+ * @param schema
+ * the frame schema
+ */
+ public FrameSchema(List<ValueType> schema) {
+ this.schema = schema;
+ }
+
+ /**
+ * Constructor that specifies the schema as a comma-separated string.
+ *
+ * @param schema
+ * the frame schema as a string
+ */
+ public FrameSchema(String schema) {
+ this.schema = schemaStringToListOfValueTypes(schema);
+ }
+
+ /**
+ * Obtain the frame schema
+ *
+ * @return the frame schema as a list of {@code ValueType} values
+ */
+ public List<ValueType> getSchema() {
+ return schema;
+ }
+
+ /**
+ * Set the frame schema
+ *
+ * @param schema
+ * the frame schema
+ */
+ public void setSchema(List<ValueType> schema) {
+ this.schema = schema;
+ }
+
+ /**
+ * Set the frame schema, specifying the frame schema as a comma-separated
+ * string
+ *
+ * @param schema
+ * the frame schema as a string
+ */
+ public void setSchemaAsString(String schema) {
+ this.schema = schemaStringToListOfValueTypes(schema);
+ }
+
+ /**
+ * Convert a schema string to a list of {@code ValueType} values
+ *
+ * @param schemaString
+ * the frame schema as a string
+ * @return the frame schema as a list of {@code ValueType} values
+ */
+ private List<ValueType> schemaStringToListOfValueTypes(String schemaString) {
+ if (StringUtils.isBlank(schemaString)) {
+ return null;
+ }
+ String[] cols = schemaString.split(",");
+ List<ValueType> list = new ArrayList<ValueType>();
+ for (String col : cols) {
+ list.add(ValueType.valueOf(col.toUpperCase()));
+ }
+ return list;
+ }
+
+ /**
+ * Obtain the schema as a comma-separated string
+ *
+ * @return the frame schema as a string
+ */
+ public String getSchemaAsString() {
+ if ((schema == null) || (schema.size() == 0)) {
+ return null;
+ }
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < schema.size(); i++) {
+ ValueType vt = schema.get(i);
+ sb.append(vt);
+ if (i + 1 < schema.size()) {
+ sb.append(",");
+ }
+ }
+ return sb.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
index 7feb86a..b0f8432 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
@@ -198,16 +198,16 @@ public class MLContextConversionUtil {
* name of the variable associated with the frame
* @param frameBlock
* frame as a FrameBlock
- * @param matrixMetadata
- * the matrix metadata
+ * @param frameMetadata
+ * the frame metadata
* @return the {@code FrameBlock} converted to a {@code FrameObject}
*/
public static FrameObject frameBlockToFrameObject(String variableName, FrameBlock frameBlock,
- MatrixMetadata matrixMetadata) {
+ FrameMetadata frameMetadata) {
try {
MatrixCharacteristics matrixCharacteristics;
- if (matrixMetadata != null) {
- matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
+ if (frameMetadata != null) {
+ matrixCharacteristics = frameMetadata.asMatrixCharacteristics();
} else {
matrixCharacteristics = new MatrixCharacteristics();
}
@@ -273,16 +273,15 @@ public class MLContextConversionUtil {
}
/**
- * Convert a {@code JavaPairRDD<Long, FrameBlock>} to a
- * {@code FrameObject}.
+ * Convert a {@code JavaPairRDD<Long, FrameBlock>} to a {@code FrameObject}.
*
* @param variableName
* name of the variable associated with the frame
* @param binaryBlocks
- * {@code JavaPairRDD<Long, FrameBlock>} representation
- * of a binary-block frame
- * @return the {@code JavaPairRDD<Long, FrameBlock>} frame
- * converted to a {@code FrameObject}
+ * {@code JavaPairRDD<Long, FrameBlock>} representation of a
+ * binary-block frame
+ * @return the {@code JavaPairRDD<Long, FrameBlock>} frame converted to a
+ * {@code FrameObject}
*/
public static FrameObject binaryBlocksToFrameObject(String variableName,
JavaPairRDD<Long, FrameBlock> binaryBlocks) {
@@ -290,33 +289,32 @@ public class MLContextConversionUtil {
}
/**
- * Convert a {@code JavaPairRDD<Long, FrameBlock>} to a
- * {@code FrameObject}.
+ * Convert a {@code JavaPairRDD<Long, FrameBlock>} to a {@code FrameObject}.
*
* @param variableName
* name of the variable associated with the frame
* @param binaryBlocks
- * {@code JavaPairRDD<Long, FrameBlock>} representation
- * of a binary-block frame
- * @param matrixMetadata
- * the matrix metadata
- * @return the {@code JavaPairRDD<Long, FrameBlock>} frame
- * converted to a {@code FrameObject}
+ * {@code JavaPairRDD<Long, FrameBlock>} representation of a
+ * binary-block frame
+ * @param frameMetadata
+ * the frame metadata
+ * @return the {@code JavaPairRDD<Long, FrameBlock>} frame converted to a
+ * {@code FrameObject}
*/
- public static FrameObject binaryBlocksToFrameObject(String variableName,
- JavaPairRDD<Long, FrameBlock> binaryBlocks, MatrixMetadata matrixMetadata) {
+ public static FrameObject binaryBlocksToFrameObject(String variableName, JavaPairRDD<Long, FrameBlock> binaryBlocks,
+ FrameMetadata frameMetadata) {
MatrixCharacteristics matrixCharacteristics;
- if (matrixMetadata != null) {
- matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
+ if (frameMetadata != null) {
+ matrixCharacteristics = frameMetadata.asMatrixCharacteristics();
} else {
matrixCharacteristics = new MatrixCharacteristics();
}
- MatrixFormatMetaData mtd = new MatrixFormatMetaData(matrixCharacteristics,
- OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
- FrameObject frameObject = new FrameObject(MLContextUtil.scratchSpace() + "/" + "temp_"
- + System.nanoTime() + variableName, mtd);
+ MatrixFormatMetaData mtd = new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo,
+ InputInfo.BinaryBlockInputInfo);
+ FrameObject frameObject = new FrameObject(
+ MLContextUtil.scratchSpace() + "/" + "temp_" + System.nanoTime() + variableName, mtd);
frameObject.setRDDHandle(new RDDObject(binaryBlocks, variableName));
return frameObject;
}
@@ -352,8 +350,8 @@ public class MLContextConversionUtil {
if (matrixMetadata == null) {
matrixMetadata = new MatrixMetadata();
}
- JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlock = MLContextConversionUtil.dataFrameToBinaryBlocks(dataFrame,
- matrixMetadata);
+ JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlock = MLContextConversionUtil
+ .dataFrameToMatrixBinaryBlocks(dataFrame, matrixMetadata);
MatrixObject matrixObject = MLContextConversionUtil.binaryBlocksToMatrixObject(variableName, binaryBlock,
matrixMetadata);
return matrixObject;
@@ -368,9 +366,8 @@ public class MLContextConversionUtil {
* the Spark {@code DataFrame}
* @return the {@code DataFrame} matrix converted to a converted to a
* {@code FrameObject}
- * @throws DMLRuntimeException
*/
- public static FrameObject dataFrameToFrameObject(String variableName, DataFrame dataFrame) throws DMLRuntimeException {
+ public static FrameObject dataFrameToFrameObject(String variableName, DataFrame dataFrame) {
return dataFrameToFrameObject(variableName, dataFrame, null);
}
@@ -381,28 +378,38 @@ public class MLContextConversionUtil {
* name of the variable associated with the frame
* @param dataFrame
* the Spark {@code DataFrame}
- * @param matrixMetadata
- * the matrix metadata
+ * @param frameMetadata
+ * the frame metadata
* @return the {@code DataFrame} frame converted to a converted to a
* {@code FrameObject}
- * @throws DMLRuntimeException
*/
public static FrameObject dataFrameToFrameObject(String variableName, DataFrame dataFrame,
- MatrixMetadata matrixMetadata) throws DMLRuntimeException {
- if (matrixMetadata == null) {
- matrixMetadata = new MatrixMetadata();
- }
+ FrameMetadata frameMetadata) {
+ try {
+ if (frameMetadata == null) {
+ frameMetadata = new FrameMetadata();
+ }
- JavaSparkContext javaSparkContext = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext());
- boolean containsID = isDataFrameWithIDColumn(matrixMetadata);
- MatrixCharacteristics matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
- JavaPairRDD<Long, FrameBlock> binaryBlock =
- FrameRDDConverterUtils.dataFrameToBinaryBlock(javaSparkContext, dataFrame,
- matrixCharacteristics, containsID);
+ JavaSparkContext javaSparkContext = MLContextUtil
+ .getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext());
+ boolean containsID = isDataFrameWithIDColumn(frameMetadata);
+ MatrixCharacteristics matrixCharacteristics = frameMetadata.asMatrixCharacteristics();
+ if (matrixCharacteristics == null) {
+ matrixCharacteristics = new MatrixCharacteristics();
+ long rows = dataFrame.count();
+ int cols = dataFrame.columns().length;
+ matrixCharacteristics.setDimension(rows, cols);
+ frameMetadata.setMatrixCharacteristics(matrixCharacteristics);
+ }
+ JavaPairRDD<Long, FrameBlock> binaryBlock = FrameRDDConverterUtils.dataFrameToBinaryBlock(javaSparkContext,
+ dataFrame, matrixCharacteristics, containsID);
- FrameObject frameObject = MLContextConversionUtil.binaryBlocksToFrameObject(variableName, binaryBlock,
- matrixMetadata);
- return frameObject;
+ FrameObject frameObject = MLContextConversionUtil.binaryBlocksToFrameObject(variableName, binaryBlock,
+ frameMetadata);
+ return frameObject;
+ } catch (DMLRuntimeException e) {
+ throw new MLContextException("Exception converting DataFrame to FrameObject", e);
+ }
}
/**
@@ -415,8 +422,8 @@ public class MLContextConversionUtil {
* {@code JavaPairRDD<MatrixIndexes,
* MatrixBlock>} binary-block matrix
*/
- public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToBinaryBlocks(DataFrame dataFrame) {
- return dataFrameToBinaryBlocks(dataFrame, null);
+ public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToMatrixBinaryBlocks(DataFrame dataFrame) {
+ return dataFrameToMatrixBinaryBlocks(dataFrame, null);
}
/**
@@ -431,7 +438,7 @@ public class MLContextConversionUtil {
* {@code JavaPairRDD<MatrixIndexes,
* MatrixBlock>} binary-block matrix
*/
- public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToBinaryBlocks(DataFrame dataFrame,
+ public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToMatrixBinaryBlocks(DataFrame dataFrame,
MatrixMetadata matrixMetadata) {
determineMatrixFormatIfNeeded(dataFrame, matrixMetadata);
@@ -467,6 +474,23 @@ public class MLContextConversionUtil {
}
/**
+ * Convert a {@code DataFrame} to a {@code JavaPairRDD<Long, FrameBlock>}
+ * binary-block frame.
+ *
+ * @param dataFrame
+ * the Spark {@code DataFrame}
+ * @param frameMetadata
+ * the frame metadata
+ * @return the {@code DataFrame} matrix converted to a
+ * {@code JavaPairRDD<Long,
+ * FrameBlock>} binary-block frame
+ */
+ public static JavaPairRDD<Long, FrameBlock> dataFrameToFrameBinaryBlocks(DataFrame dataFrame,
+ FrameMetadata frameMetadata) {
+ throw new MLContextException("dataFrameToFrameBinaryBlocks is unimplemented");
+ }
+
+ /**
* If the MatrixFormat of the DataFrame has not been explicitly specified,
* attempt to determine the proper MatrixFormat.
*
@@ -530,6 +554,25 @@ public class MLContextConversionUtil {
}
/**
+ * Return whether or not the DataFrame has an ID column.
+ *
+ * @param frameMetadata
+ * the frame metadata
+ * @return {@code true} if the DataFrame has an ID column, {@code false}
+ * otherwise.
+ */
+ public static boolean isDataFrameWithIDColumn(FrameMetadata frameMetadata) {
+ if (frameMetadata == null) {
+ return false;
+ }
+ FrameFormat frameFormat = frameMetadata.getFrameFormat();
+ if (frameFormat == null) {
+ return false;
+ }
+ return frameFormat.hasIDColumn();
+ }
+
+ /**
* Return whether or not the DataFrame is vector-based.
*
* @param matrixMetadata
@@ -645,27 +688,29 @@ public class MLContextConversionUtil {
* name of the variable associated with the frame
* @param javaRDD
* the Java RDD of strings
- * @param matrixMetadata
- * matrix metadata
+ * @param frameMetadata
+ * frame metadata
* @return the {@code JavaRDD<String>} converted to a {@code FrameObject}
*/
public static FrameObject javaRDDStringCSVToFrameObject(String variableName, JavaRDD<String> javaRDD,
- MatrixMetadata matrixMetadata) {
+ FrameMetadata frameMetadata) {
JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
MatrixCharacteristics matrixCharacteristics;
- if (matrixMetadata != null) {
- matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
+ if (frameMetadata != null) {
+ matrixCharacteristics = frameMetadata.asMatrixCharacteristics();
} else {
matrixCharacteristics = new MatrixCharacteristics();
}
JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
-
+
JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext());
- FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+ FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics,
+ OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
JavaPairRDD<Long, FrameBlock> rdd;
try {
- rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc, javaPairRDDText, matrixCharacteristics, false, ",", false, -1);
+ rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc, javaPairRDDText, matrixCharacteristics, false, ",",
+ false, -1);
} catch (DMLRuntimeException e) {
e.printStackTrace();
return null;
@@ -710,30 +755,31 @@ public class MLContextConversionUtil {
* name of the variable associated with the frame
* @param javaRDD
* the Java RDD of strings
- * @param matrixMetadata
- * matrix metadata
+ * @param frameMetadata
+ * frame metadata
* @return the {@code JavaRDD<String>} converted to a {@code FrameObject}
*/
public static FrameObject javaRDDStringIJVToFrameObject(String variableName, JavaRDD<String> javaRDD,
- MatrixMetadata matrixMetadata) {
+ FrameMetadata frameMetadata) {
JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
MatrixCharacteristics matrixCharacteristics;
- if (matrixMetadata != null) {
- matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
+ if (frameMetadata != null) {
+ matrixCharacteristics = frameMetadata.asMatrixCharacteristics();
} else {
matrixCharacteristics = new MatrixCharacteristics();
}
-
+
JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
-
+
JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext());
- FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+ FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics,
+ OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
JavaPairRDD<Long, FrameBlock> rdd;
try {
List<ValueType> lschema = null;
- if(lschema == null)
- lschema = Collections.nCopies((int)matrixCharacteristics.getCols(), ValueType.STRING);
+ if (lschema == null)
+ lschema = Collections.nCopies((int) matrixCharacteristics.getCols(), ValueType.STRING);
rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc, javaPairRDDText, matrixCharacteristics, lschema);
} catch (DMLRuntimeException e) {
e.printStackTrace();
@@ -794,15 +840,15 @@ public class MLContextConversionUtil {
* name of the variable associated with the frame
* @param rdd
* the RDD of strings
- * @param matrixMetadata
+ * @param frameMetadata
* frame metadata
* @return the {@code RDD<String>} converted to a {@code FrameObject}
*/
public static FrameObject rddStringCSVToFrameObject(String variableName, RDD<String> rdd,
- MatrixMetadata matrixMetadata) {
+ FrameMetadata frameMetadata) {
ClassTag<String> tag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
JavaRDD<String> javaRDD = JavaRDD.fromRDD(rdd, tag);
- return javaRDDStringCSVToFrameObject(variableName, javaRDD, matrixMetadata);
+ return javaRDDStringCSVToFrameObject(variableName, javaRDD, frameMetadata);
}
/**
@@ -832,15 +878,15 @@ public class MLContextConversionUtil {
* name of the variable associated with the frame
* @param rdd
* the RDD of strings
- * @param matrixMetadata
+ * @param frameMetadata
* frame metadata
* @return the {@code RDD<String>} converted to a {@code FrameObject}
*/
public static FrameObject rddStringIJVToFrameObject(String variableName, RDD<String> rdd,
- MatrixMetadata matrixMetadata) {
+ FrameMetadata frameMetadata) {
ClassTag<String> tag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
JavaRDD<String> javaRDD = JavaRDD.fromRDD(rdd, tag);
- return javaRDDStringIJVToFrameObject(variableName, javaRDD, matrixMetadata);
+ return javaRDDStringIJVToFrameObject(variableName, javaRDD, frameMetadata);
}
/**
@@ -898,8 +944,7 @@ public class MLContextConversionUtil {
}
/**
- * Convert a {@code FrameObject} to a {@code JavaRDD<String>} in CSV
- * format.
+ * Convert a {@code FrameObject} to a {@code JavaRDD<String>} in CSV format.
*
* @param frameObject
* the {@code FrameObject}
@@ -908,7 +953,7 @@ public class MLContextConversionUtil {
public static JavaRDD<String> frameObjectToJavaRDDStringCSV(FrameObject frameObject, String delimiter) {
List<String> list = frameObjectToListStringCSV(frameObject, delimiter);
- JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext)MLContextProxy.getActiveMLContext());
+ JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext());
JavaRDD<String> javaRDDStringCSV = jsc.parallelize(list);
return javaRDDStringCSV;
}
@@ -933,8 +978,7 @@ public class MLContextConversionUtil {
}
/**
- * Convert a {@code FrameObject} to a {@code JavaRDD<String>} in IJV
- * format.
+ * Convert a {@code FrameObject} to a {@code JavaRDD<String>} in IJV format.
*
* @param frameObject
* the {@code FrameObject}
@@ -943,7 +987,7 @@ public class MLContextConversionUtil {
public static JavaRDD<String> frameObjectToJavaRDDStringIJV(FrameObject frameObject) {
List<String> list = frameObjectToListStringIJV(frameObject);
- JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext)MLContextProxy.getActiveMLContext());
+ JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext());
JavaRDD<String> javaRDDStringIJV = jsc.parallelize(list);
return javaRDDStringIJV;
}
@@ -1343,5 +1387,49 @@ public class MLContextConversionUtil {
throw new MLContextException("DMLRuntimeException while converting matrix object to BinaryBlockMatrix", e);
}
}
-
+
+ /**
+ * Convert a {@code FrameObject} to a {@code BinaryBlockFrame}.
+ *
+ * @param frameObject
+ * the {@code FrameObject}
+ * @param sparkExecutionContext
+ * the Spark execution context
+ * @return the {@code FrameObject} converted to a {@code BinaryBlockFrame}
+ */
+ public static BinaryBlockFrame frameObjectToBinaryBlockFrame(FrameObject frameObject,
+ SparkExecutionContext sparkExecutionContext) {
+ try {
+ @SuppressWarnings("unchecked")
+ JavaPairRDD<Long, FrameBlock> binaryBlock = (JavaPairRDD<Long, FrameBlock>) sparkExecutionContext
+ .getRDDHandleForFrameObject(frameObject, InputInfo.BinaryBlockInputInfo);
+ MatrixCharacteristics matrixCharacteristics = frameObject.getMatrixCharacteristics();
+ FrameSchema fs = new FrameSchema(frameObject.getSchema());
+ FrameMetadata fm = new FrameMetadata(fs, matrixCharacteristics);
+ BinaryBlockFrame binaryBlockFrame = new BinaryBlockFrame(binaryBlock, fm);
+ return binaryBlockFrame;
+ } catch (DMLRuntimeException e) {
+ throw new MLContextException("DMLRuntimeException while converting frame object to BinaryBlockFrame", e);
+ }
+ }
+
+ /**
+ * Convert a {@code FrameObject} to a two-dimensional string array.
+ *
+ * @param frameObject
+ * the {@code FrameObject}
+ * @return the {@code FrameObject} converted to a {@code String[][]}
+ */
+ public static String[][] frameObjectTo2DStringArray(FrameObject frameObject) {
+ try {
+ FrameBlock fb = frameObject.acquireRead();
+ String[][] frame = DataConverter.convertToStringFrame(fb);
+ frameObject.release();
+ return frame;
+ } catch (CacheException e) {
+ throw new MLContextException("CacheException while converting frame object to 2D string array", e);
+ } catch (DMLRuntimeException e) {
+ throw new MLContextException("DMLRuntimeException while converting frame object to 2D string array", e);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
index 9813174..566fba1 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
@@ -25,7 +25,6 @@ import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
-import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -39,14 +38,15 @@ import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
import org.apache.sysml.conf.CompilerConfig;
import org.apache.sysml.conf.CompilerConfig.ConfigType;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.parser.ParseException;
-import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
import org.apache.sysml.runtime.controlprogram.caching.FrameObject;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
@@ -76,7 +76,8 @@ public final class MLContextUtil {
*/
@SuppressWarnings("rawtypes")
public static final Class[] COMPLEX_DATA_TYPES = { JavaRDD.class, RDD.class, DataFrame.class,
- BinaryBlockMatrix.class, Matrix.class, (new double[][] {}).getClass(), MatrixBlock.class, URL.class };
+ BinaryBlockMatrix.class, BinaryBlockFrame.class, Matrix.class, Frame.class, (new double[][] {}).getClass(),
+ MatrixBlock.class, URL.class };
/**
* All data types supported by the MLContext API
@@ -157,8 +158,8 @@ public final class MLContextUtil {
*/
public static void verifySparkVersionSupported(SparkContext sc) {
if (!MLContextUtil.isSparkVersionSupported(sc.version())) {
- throw new MLContextException("SystemML requires Spark " + MLContext.SYSTEMML_MINIMUM_SPARK_VERSION
- + " or greater");
+ throw new MLContextException(
+ "SystemML requires Spark " + MLContext.SYSTEMML_MINIMUM_SPARK_VERSION + " or greater");
}
}
@@ -201,40 +202,6 @@ public final class MLContextUtil {
}
/**
- * Convenience method to generate a {@code Map<String, Object>} of key/value
- * pairs.
- * <p>
- * Example:<br>
- * {@code Map<String, Object> inputMap = MLContextUtil.generateInputMap("A", 1, "B", "two", "C", 3);}
- * <br>
- * <br>
- * This is equivalent to:<br>
- * <code>Map<String, Object> inputMap = new LinkedHashMap<String, Object>(){{
- * <br>put("A", 1);
- * <br>put("B", "two");
- * <br>put("C", 3);
- * <br>}};</code>
- *
- * @param objs
- * List of String/Object pairs
- * @return Map of String/Object pairs
- * @throws MLContextException
- * if the number of arguments is not an even number
- */
- public static Map<String, Object> generateInputMap(Object... objs) {
- int len = objs.length;
- if ((len & 1) == 1) {
- throw new MLContextException("The number of arguments needs to be an even number");
- }
- Map<String, Object> map = new LinkedHashMap<String, Object>();
- int i = 0;
- while (i < len) {
- map.put((String) objs[i++], objs[i++]);
- }
- return map;
- }
-
- /**
* Verify that the types of input values are supported.
*
* @param inputs
@@ -314,8 +281,8 @@ public final class MLContextUtil {
}
}
if (!supported) {
- throw new MLContextException("Input parameter (\"" + parameterName + "\") value type not supported: "
- + o.getClass());
+ throw new MLContextException(
+ "Input parameter (\"" + parameterName + "\") value type not supported: " + o.getClass());
}
}
@@ -412,7 +379,7 @@ public final class MLContextUtil {
* @return input in SystemML data representation
*/
public static Data convertInputType(String parameterName, Object parameterValue) {
- return convertInputType(parameterName, parameterValue, null, false);
+ return convertInputType(parameterName, parameterValue, null);
}
/**
@@ -422,15 +389,16 @@ public final class MLContextUtil {
* The name of the input parameter
* @param parameterValue
* The value of the input parameter
- * @param matrixMetadata
- * matrix metadata
- * @param bFrame
- * if input is of type frame
+ * @param metadata
+ * matrix/frame metadata
* @return input in SystemML data representation
*/
- public static Data convertInputType(String parameterName, Object parameterValue, MatrixMetadata matrixMetadata, boolean bFrame) {
+ public static Data convertInputType(String parameterName, Object parameterValue, Metadata metadata) {
String name = parameterName;
Object value = parameterValue;
+ boolean hasMetadata = (metadata != null) ? true : false;
+ boolean hasMatrixMetadata = hasMetadata && (metadata instanceof MatrixMetadata) ? true : false;
+ boolean hasFrameMetadata = hasMetadata && (metadata instanceof FrameMetadata) ? true : false;
if (name == null) {
throw new MLContextException("Input parameter name is null");
} else if (value == null) {
@@ -438,91 +406,138 @@ public final class MLContextUtil {
} else if (value instanceof JavaRDD<?>) {
@SuppressWarnings("unchecked")
JavaRDD<String> javaRDD = (JavaRDD<String>) value;
- if(!bFrame) {
+
+ if (hasMatrixMetadata) {
+ MatrixMetadata matrixMetadata = (MatrixMetadata) metadata;
MatrixObject matrixObject;
- if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) {
- matrixObject = MLContextConversionUtil.javaRDDStringIJVToMatrixObject(name, javaRDD, matrixMetadata);
+ if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
+ matrixObject = MLContextConversionUtil.javaRDDStringIJVToMatrixObject(name, javaRDD,
+ matrixMetadata);
} else {
- matrixObject = MLContextConversionUtil.javaRDDStringCSVToMatrixObject(name, javaRDD, matrixMetadata);
+ matrixObject = MLContextConversionUtil.javaRDDStringCSVToMatrixObject(name, javaRDD,
+ matrixMetadata);
}
return matrixObject;
- } else {
+ } else if (hasFrameMetadata) {
+ FrameMetadata frameMetadata = (FrameMetadata) metadata;
FrameObject frameObject;
- if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) {
- frameObject = MLContextConversionUtil.javaRDDStringIJVToFrameObject(name, javaRDD, matrixMetadata);
+ if (frameMetadata.getFrameFormat() == FrameFormat.IJV) {
+ frameObject = MLContextConversionUtil.javaRDDStringIJVToFrameObject(name, javaRDD, frameMetadata);
} else {
- frameObject = MLContextConversionUtil.javaRDDStringCSVToFrameObject(name, javaRDD, matrixMetadata);
+ frameObject = MLContextConversionUtil.javaRDDStringCSVToFrameObject(name, javaRDD, frameMetadata);
}
return frameObject;
+ } else if (!hasMetadata) {
+ String firstLine = javaRDD.first();
+ boolean isAllNumbers = isCSVLineAllNumbers(firstLine);
+ if (isAllNumbers) {
+ MatrixObject matrixObject = MLContextConversionUtil.javaRDDStringCSVToMatrixObject(name, javaRDD);
+ return matrixObject;
+ } else {
+ FrameObject frameObject = MLContextConversionUtil.javaRDDStringCSVToFrameObject(name, javaRDD);
+ return frameObject;
+ }
}
+
} else if (value instanceof RDD<?>) {
@SuppressWarnings("unchecked")
RDD<String> rdd = (RDD<String>) value;
- if(!bFrame) {
+
+ if (hasMatrixMetadata) {
+ MatrixMetadata matrixMetadata = (MatrixMetadata) metadata;
MatrixObject matrixObject;
- if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) {
+ if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
matrixObject = MLContextConversionUtil.rddStringIJVToMatrixObject(name, rdd, matrixMetadata);
} else {
matrixObject = MLContextConversionUtil.rddStringCSVToMatrixObject(name, rdd, matrixMetadata);
}
return matrixObject;
- } else {
+ } else if (hasFrameMetadata) {
+ FrameMetadata frameMetadata = (FrameMetadata) metadata;
FrameObject frameObject;
- if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) {
- frameObject = MLContextConversionUtil.rddStringIJVToFrameObject(name, rdd, matrixMetadata);
+ if (frameMetadata.getFrameFormat() == FrameFormat.IJV) {
+ frameObject = MLContextConversionUtil.rddStringIJVToFrameObject(name, rdd, frameMetadata);
} else {
- frameObject = MLContextConversionUtil.rddStringCSVToFrameObject(name, rdd, matrixMetadata);
+ frameObject = MLContextConversionUtil.rddStringCSVToFrameObject(name, rdd, frameMetadata);
}
return frameObject;
+ } else if (!hasMetadata) {
+ String firstLine = rdd.first();
+ boolean isAllNumbers = isCSVLineAllNumbers(firstLine);
+ if (isAllNumbers) {
+ MatrixObject matrixObject = MLContextConversionUtil.rddStringCSVToMatrixObject(name, rdd);
+ return matrixObject;
+ } else {
+ FrameObject frameObject = MLContextConversionUtil.rddStringCSVToFrameObject(name, rdd);
+ return frameObject;
+ }
}
-
} else if (value instanceof MatrixBlock) {
MatrixBlock matrixBlock = (MatrixBlock) value;
MatrixObject matrixObject = MLContextConversionUtil.matrixBlockToMatrixObject(name, matrixBlock,
- matrixMetadata);
+ (MatrixMetadata) metadata);
return matrixObject;
} else if (value instanceof FrameBlock) {
FrameBlock frameBlock = (FrameBlock) value;
FrameObject frameObject = MLContextConversionUtil.frameBlockToFrameObject(name, frameBlock,
- matrixMetadata);
+ (FrameMetadata) metadata);
return frameObject;
} else if (value instanceof DataFrame) {
DataFrame dataFrame = (DataFrame) value;
- if(!bFrame) {
- MatrixObject matrixObject = MLContextConversionUtil
- .dataFrameToMatrixObject(name, dataFrame, matrixMetadata);
+
+ if (hasMatrixMetadata) {
+ MatrixObject matrixObject = MLContextConversionUtil.dataFrameToMatrixObject(name, dataFrame,
+ (MatrixMetadata) metadata);
return matrixObject;
- } else {
- FrameObject frameObject = null;
- try {
- frameObject = MLContextConversionUtil
- .dataFrameToFrameObject(name, dataFrame, matrixMetadata);
- } catch (DMLRuntimeException e) {
- e.printStackTrace();
- }
+ } else if (hasFrameMetadata) {
+ FrameObject frameObject = MLContextConversionUtil.dataFrameToFrameObject(name, dataFrame,
+ (FrameMetadata) metadata);
return frameObject;
+ } else if (!hasMetadata) {
+ Row firstRow = dataFrame.first();
+ boolean looksLikeMatrix = doesRowLookLikeMatrixRow(firstRow);
+ if (looksLikeMatrix) {
+ MatrixObject matrixObject = MLContextConversionUtil.dataFrameToMatrixObject(name, dataFrame);
+ return matrixObject;
+ } else {
+ FrameObject frameObject = MLContextConversionUtil.dataFrameToFrameObject(name, dataFrame);
+ return frameObject;
+ }
}
} else if (value instanceof BinaryBlockMatrix) {
BinaryBlockMatrix binaryBlockMatrix = (BinaryBlockMatrix) value;
- if (matrixMetadata == null) {
- matrixMetadata = binaryBlockMatrix.getMatrixMetadata();
+ if (metadata == null) {
+ metadata = binaryBlockMatrix.getMatrixMetadata();
}
JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlocks = binaryBlockMatrix.getBinaryBlocks();
MatrixObject matrixObject = MLContextConversionUtil.binaryBlocksToMatrixObject(name, binaryBlocks,
- matrixMetadata);
+ (MatrixMetadata) metadata);
return matrixObject;
+ } else if (value instanceof BinaryBlockFrame) {
+ BinaryBlockFrame binaryBlockFrame = (BinaryBlockFrame) value;
+ if (metadata == null) {
+ metadata = binaryBlockFrame.getFrameMetadata();
+ }
+ JavaPairRDD<Long, FrameBlock> binaryBlocks = binaryBlockFrame.getBinaryBlocks();
+ FrameObject frameObject = MLContextConversionUtil.binaryBlocksToFrameObject(name, binaryBlocks,
+ (FrameMetadata) metadata);
+ return frameObject;
} else if (value instanceof Matrix) {
Matrix matrix = (Matrix) value;
MatrixObject matrixObject = matrix.asMatrixObject();
return matrixObject;
+ } else if (value instanceof Frame) {
+ Frame frame = (Frame) value;
+ FrameObject frameObject = frame.asFrameObject();
+ return frameObject;
} else if (value instanceof double[][]) {
double[][] doubleMatrix = (double[][]) value;
MatrixObject matrixObject = MLContextConversionUtil.doubleMatrixToMatrixObject(name, doubleMatrix,
- matrixMetadata);
+ (MatrixMetadata) metadata);
return matrixObject;
} else if (value instanceof URL) {
URL url = (URL) value;
- MatrixObject matrixObject = MLContextConversionUtil.urlToMatrixObject(name, url, matrixMetadata);
+ MatrixObject matrixObject = MLContextConversionUtil.urlToMatrixObject(name, url, (MatrixMetadata) metadata);
return matrixObject;
} else if (value instanceof Integer) {
Integer i = (Integer) value;
@@ -545,6 +560,56 @@ public final class MLContextUtil {
}
/**
+ * If no metadata is supplied for an RDD or JavaRDD, this method can be used
+ * to determine whether the data appears to be matrix (or a frame)
+ *
+ * @param line
+ * a line of the RDD
+ * @return {@code true} if all the csv-separated values are numbers,
+ * {@code false} otherwise
+ */
+ public static boolean isCSVLineAllNumbers(String line) {
+ if (StringUtils.isBlank(line)) {
+ return false;
+ }
+ String[] parts = line.split(",");
+ for (int i = 0; i < parts.length; i++) {
+ String part = parts[i].trim();
+ try {
+ Double.parseDouble(part);
+ } catch (NumberFormatException e) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * If no metadata is supplied for a DataFrame, this method can be used to
+ * determine whether the data appears to be a matrix (or a frame)
+ *
+ * @param row
+ * a row in the DataFrame
+ * @return {@code true} if the row appears to be a matrix row, {@code false}
+ * otherwise
+ */
+ public static boolean doesRowLookLikeMatrixRow(Row row) {
+ for (int i = 0; i < row.length(); i++) {
+ Object object = row.get(i);
+ if (object instanceof Vector) {
+ return true;
+ }
+ String str = object.toString();
+ try {
+ Double.parseDouble(str);
+ } catch (NumberFormatException e) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
* Return the default matrix block size.
*
* @return the default matrix block size
@@ -559,7 +624,7 @@ public final class MLContextUtil {
* @return the lcoation of the scratch space directory
*/
public static String scratchSpace() {
- return ConfigurationManager.getScratchSpace();
+ return ConfigurationManager.getScratchSpace();
}
/**
@@ -738,7 +803,7 @@ public final class MLContextUtil {
* the map of inputs
* @return the script inputs represented as a String
*/
- public static String displayInputs(String name, Map<String, Object> map) {
+ public static String displayInputs(String name, Map<String, Object> map, LocalVariableMap symbolTable) {
StringBuilder sb = new StringBuilder();
sb.append(name);
sb.append(":\n");
@@ -764,6 +829,11 @@ public final class MLContextUtil {
sb.append(" (");
sb.append(type);
+ if (doesSymbolTableContainMatrixObject(symbolTable, key)) {
+ sb.append(" as Matrix");
+ } else if (doesSymbolTableContainFrameObject(symbolTable, key)) {
+ sb.append(" as Frame");
+ }
sb.append(") ");
sb.append(key);
@@ -890,14 +960,78 @@ public final class MLContextUtil {
return sb.toString();
}
- public static SparkContext getSparkContext(MLContext mlContext)
- {
+ /**
+ * Obtain the Spark Context
+ *
+ * @param mlContext
+ * the SystemML MLContext
+ * @return the Spark Context
+ */
+ public static SparkContext getSparkContext(MLContext mlContext) {
return mlContext.getSparkContext();
}
- public static JavaSparkContext getJavaSparkContext(MLContext mlContext)
- {
+ /**
+ * Obtain the Java Spark Context
+ *
+ * @param mlContext
+ * the SystemML MLContext
+ * @return the Java Spark Context
+ */
+ public static JavaSparkContext getJavaSparkContext(MLContext mlContext) {
return new JavaSparkContext(mlContext.getSparkContext());
}
+ /**
+ * Determine if the symbol table contains a FrameObject with the given
+ * variable name.
+ *
+ * @param symbolTable
+ * the LocalVariableMap
+ * @param variableName
+ * the variable name
+ * @return {@code true} if the variable in the symbol table is a
+ * FrameObject, {@code false} otherwise.
+ */
+ public static boolean doesSymbolTableContainFrameObject(LocalVariableMap symbolTable, String variableName) {
+ if (symbolTable == null) {
+ return false;
+ }
+ Data data = symbolTable.get(variableName);
+ if (data == null) {
+ return false;
+ }
+ if (data instanceof FrameObject) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Determine if the symbol table contains a MatrixObject with the given
+ * variable name.
+ *
+ * @param symbolTable
+ * the LocalVariableMap
+ * @param variableName
+ * the variable name
+ * @return {@code true} if the variable in the symbol table is a
+ * MatrixObject, {@code false} otherwise.
+ */
+ public static boolean doesSymbolTableContainMatrixObject(LocalVariableMap symbolTable, String variableName) {
+ if (symbolTable == null) {
+ return false;
+ }
+ Data data = symbolTable.get(variableName);
+ if (data == null) {
+ return false;
+ }
+ if (data instanceof MatrixObject) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
}